libsanitizer: merge from master (c86b4503a94c277534ce4b9a5c015a6ac151b98a).

2021-11-04 09:20:14 +01:00 · 2021-11-04 09:20:14 +01:00 · cb0437584b
commit cb0437584b
parent bb27f5e9ec
63 changed files with 1457 additions and 1300 deletions
--- a/libsanitizer/MERGE
+++ b/libsanitizer/MERGE
@ -1,4 +1,4 @@
-fdf4c035225de52f596899931b1f6100e5e3e928
+c86b4503a94c277534ce4b9a5c015a6ac151b98a

 The first line of this file holds the git revision number of the
 last merge done from the master library sources.
--- a/libsanitizer/asan/asan_allocator.cpp
+++ b/libsanitizer/asan/asan_allocator.cpp
@ -522,7 +522,7 @@ struct Allocator {
        size > max_user_defined_malloc_size) {
      if (AllocatorMayReturnNull()) {
        Report("WARNING: AddressSanitizer failed to allocate 0x%zx bytes\n",
-               (void*)size);
+               size);
        return nullptr;
      }
      uptr malloc_limit =
--- a/libsanitizer/asan/asan_descriptions.cpp
+++ b/libsanitizer/asan/asan_descriptions.cpp
@ -251,7 +251,7 @@ static void PrintAccessAndVarIntersection(const StackVarDescr &var, uptr addr,
  }
  str.append("'");
  if (var.line > 0) {
-    str.append(" (line %d)", var.line);
+    str.append(" (line %zd)", var.line);
  }
  if (pos_descr) {
    Decorator d;
@ -318,7 +318,8 @@ bool DescribeAddressIfGlobal(uptr addr, uptr access_size,
 }

 void ShadowAddressDescription::Print() const {
-  Printf("Address %p is located in the %s area.\n", addr, ShadowNames[kind]);
+  Printf("Address %p is located in the %s area.\n", (void *)addr,
+         ShadowNames[kind]);
 }

 void GlobalAddressDescription::Print(const char *bug_type) const {
@ -356,7 +357,7 @@ bool GlobalAddressDescription::PointsInsideTheSameVariable(
 void StackAddressDescription::Print() const {
  Decorator d;
  Printf("%s", d.Location());
-  Printf("Address %p is located in stack of thread %s", addr,
+  Printf("Address %p is located in stack of thread %s", (void *)addr,
         AsanThreadIdAndName(tid).c_str());

  if (!frame_descr) {
@ -469,7 +470,7 @@ AddressDescription::AddressDescription(uptr addr, uptr access_size,

 void WildAddressDescription::Print() const {
  Printf("Address %p is a wild pointer inside of access range of size %p.\n",
-         addr, access_size);
+         (void *)addr, (void *)access_size);
 }

 void PrintAddressDescription(uptr addr, uptr access_size,
--- a/libsanitizer/asan/asan_errors.cpp
+++ b/libsanitizer/asan/asan_errors.cpp
@ -46,10 +46,9 @@ void ErrorDeadlySignal::Print() {
 void ErrorDoubleFree::Print() {
  Decorator d;
  Printf("%s", d.Error());
-  Report(
-      "ERROR: AddressSanitizer: attempting %s on %p in thread %s:\n",
-      scariness.GetDescription(), addr_description.addr,
-      AsanThreadIdAndName(tid).c_str());
+  Report("ERROR: AddressSanitizer: attempting %s on %p in thread %s:\n",
+         scariness.GetDescription(), (void *)addr_description.addr,
+         AsanThreadIdAndName(tid).c_str());
  Printf("%s", d.Default());
  scariness.Print();
  GET_STACK_TRACE_FATAL(second_free_stack->trace[0],
@ -62,10 +61,9 @@ void ErrorDoubleFree::Print() {
 void ErrorNewDeleteTypeMismatch::Print() {
  Decorator d;
  Printf("%s", d.Error());
-  Report(
-      "ERROR: AddressSanitizer: %s on %p in thread %s:\n",
-      scariness.GetDescription(), addr_description.addr,
-      AsanThreadIdAndName(tid).c_str());
+  Report("ERROR: AddressSanitizer: %s on %p in thread %s:\n",
+         scariness.GetDescription(), (void *)addr_description.addr,
+         AsanThreadIdAndName(tid).c_str());
  Printf("%s  object passed to delete has wrong type:\n", d.Default());
  if (delete_size != 0) {
    Printf(
@ -106,7 +104,7 @@ void ErrorFreeNotMalloced::Print() {
  Report(
      "ERROR: AddressSanitizer: attempting free on address "
      "which was not malloc()-ed: %p in thread %s\n",
-      addr_description.Address(), AsanThreadIdAndName(tid).c_str());
+      (void *)addr_description.Address(), AsanThreadIdAndName(tid).c_str());
  Printf("%s", d.Default());
  CHECK_GT(free_stack->size, 0);
  scariness.Print();
@ -126,7 +124,7 @@ void ErrorAllocTypeMismatch::Print() {
  Printf("%s", d.Error());
  Report("ERROR: AddressSanitizer: %s (%s vs %s) on %p\n",
         scariness.GetDescription(), alloc_names[alloc_type],
-         dealloc_names[dealloc_type], addr_description.Address());
+         dealloc_names[dealloc_type], (void *)addr_description.Address());
  Printf("%s", d.Default());
  CHECK_GT(dealloc_stack->size, 0);
  scariness.Print();
@ -145,7 +143,7 @@ void ErrorMallocUsableSizeNotOwned::Print() {
  Report(
      "ERROR: AddressSanitizer: attempting to call malloc_usable_size() for "
      "pointer which is not owned: %p\n",
-      addr_description.Address());
+      (void *)addr_description.Address());
  Printf("%s", d.Default());
  stack->Print();
  addr_description.Print();
@ -158,7 +156,7 @@ void ErrorSanitizerGetAllocatedSizeNotOwned::Print() {
  Report(
      "ERROR: AddressSanitizer: attempting to call "
      "__sanitizer_get_allocated_size() for pointer which is not owned: %p\n",
-      addr_description.Address());
+      (void *)addr_description.Address());
  Printf("%s", d.Default());
  stack->Print();
  addr_description.Print();
@ -298,9 +296,10 @@ void ErrorStringFunctionMemoryRangesOverlap::Print() {
  Report(
      "ERROR: AddressSanitizer: %s: memory ranges [%p,%p) and [%p, %p) "
      "overlap\n",
-      bug_type, addr1_description.Address(),
-      addr1_description.Address() + length1, addr2_description.Address(),
-      addr2_description.Address() + length2);
+      bug_type, (void *)addr1_description.Address(),
+      (void *)(addr1_description.Address() + length1),
+      (void *)addr2_description.Address(),
+      (void *)(addr2_description.Address() + length2));
  Printf("%s", d.Default());
  scariness.Print();
  stack->Print();
@ -329,10 +328,10 @@ void ErrorBadParamsToAnnotateContiguousContainer::Print() {
      "      end     : %p\n"
      "      old_mid : %p\n"
      "      new_mid : %p\n",
-      beg, end, old_mid, new_mid);
+      (void *)beg, (void *)end, (void *)old_mid, (void *)new_mid);
  uptr granularity = SHADOW_GRANULARITY;
  if (!IsAligned(beg, granularity))
-    Report("ERROR: beg is not aligned by %d\n", granularity);
+    Report("ERROR: beg is not aligned by %zu\n", granularity);
  stack->Print();
  ReportErrorSummary(scariness.GetDescription(), stack);
 }
@ -341,7 +340,7 @@ void ErrorODRViolation::Print() {
  Decorator d;
  Printf("%s", d.Error());
  Report("ERROR: AddressSanitizer: %s (%p):\n", scariness.GetDescription(),
-         global1.beg);
+         (void *)global1.beg);
  Printf("%s", d.Default());
  InternalScopedString g1_loc;
  InternalScopedString g2_loc;
@ -371,7 +370,8 @@ void ErrorInvalidPointerPair::Print() {
  Decorator d;
  Printf("%s", d.Error());
  Report("ERROR: AddressSanitizer: %s: %p %p\n", scariness.GetDescription(),
-         addr1_description.Address(), addr2_description.Address());
+         (void *)addr1_description.Address(),
+         (void *)addr2_description.Address());
  Printf("%s", d.Default());
  GET_STACK_TRACE_FATAL(pc, bp);
  stack.Print();
@ -575,7 +575,7 @@ void ErrorGeneric::Print() {
  Printf("%s", d.Error());
  uptr addr = addr_description.Address();
  Report("ERROR: AddressSanitizer: %s on address %p at pc %p bp %p sp %p\n",
-         bug_descr, (void *)addr, pc, bp, sp);
+         bug_descr, (void *)addr, (void *)pc, (void *)bp, (void *)sp);
  Printf("%s", d.Default());

  Printf("%s%s of size %zu at %p thread %s%s\n", d.Access(),
--- a/libsanitizer/asan/asan_globals.cpp
+++ b/libsanitizer/asan/asan_globals.cpp
@ -85,10 +85,10 @@ static void ReportGlobal(const Global &g, const char *prefix) {
  Report(
      "%s Global[%p]: beg=%p size=%zu/%zu name=%s module=%s dyn_init=%zu "
      "odr_indicator=%p\n",
-      prefix, &g, (void *)g.beg, g.size, g.size_with_redzone, g.name,
+      prefix, (void *)&g, (void *)g.beg, g.size, g.size_with_redzone, g.name,
      g.module_name, g.has_dynamic_init, (void *)g.odr_indicator);
  if (g.location) {
-    Report("  location (%p): name=%s[%p], %d %d\n", g.location,
+    Report("  location (%p): name=%s[%p], %d %d\n", (void *)g.location,
           g.location->filename, g.location->filename, g.location->line_no,
           g.location->column_no);
  }
@ -154,6 +154,23 @@ static void CheckODRViolationViaIndicator(const Global *g) {
  }
 }

+// Check ODR violation for given global G by checking if it's already poisoned.
+// We use this method in case compiler doesn't use private aliases for global
+// variables.
+static void CheckODRViolationViaPoisoning(const Global *g) {
+  if (__asan_region_is_poisoned(g->beg, g->size_with_redzone)) {
+    // This check may not be enough: if the first global is much larger
+    // the entire redzone of the second global may be within the first global.
+    for (ListOfGlobals *l = list_of_all_globals; l; l = l->next) {
+      if (g->beg == l->g->beg &&
+          (flags()->detect_odr_violation >= 2 || g->size != l->g->size) &&
+          !IsODRViolationSuppressed(g->name))
+        ReportODRViolation(g, FindRegistrationSite(g),
+                           l->g, FindRegistrationSite(l->g));
+    }
+  }
+}
+
 // Clang provides two different ways for global variables protection:
 // it can poison the global itself or its private alias. In former
 // case we may poison same symbol multiple times, that can help us to
@ -199,6 +216,8 @@ static void RegisterGlobal(const Global *g) {
    // where two globals with the same name are defined in different modules.
    if (UseODRIndicator(g))
      CheckODRViolationViaIndicator(g);
+    else
+      CheckODRViolationViaPoisoning(g);
  }
  if (CanPoisonMemory())
    PoisonRedZones(*g);
@ -350,7 +369,8 @@ void __asan_register_globals(__asan_global *globals, uptr n) {
  global_registration_site_vector->push_back(site);
  if (flags()->report_globals >= 2) {
    PRINT_CURRENT_STACK();
-    Printf("=== ID %d; %p %p\n", stack_id, &globals[0], &globals[n - 1]);
+    Printf("=== ID %d; %p %p\n", stack_id, (void *)&globals[0],
+           (void *)&globals[n - 1]);
  }
  for (uptr i = 0; i < n; i++) {
    if (SANITIZER_WINDOWS && globals[i].beg == 0) {
--- a/libsanitizer/asan/asan_interceptors.h
+++ b/libsanitizer/asan/asan_interceptors.h
@ -81,12 +81,7 @@ void InitializePlatformInterceptors();
 #if ASAN_HAS_EXCEPTIONS && !SANITIZER_WINDOWS && !SANITIZER_SOLARIS && \
    !SANITIZER_NETBSD
 # define ASAN_INTERCEPT___CXA_THROW 1
-# if ! defined(ASAN_HAS_CXA_RETHROW_PRIMARY_EXCEPTION) \
-     || ASAN_HAS_CXA_RETHROW_PRIMARY_EXCEPTION
-#   define ASAN_INTERCEPT___CXA_RETHROW_PRIMARY_EXCEPTION 1
-# else
-#   define ASAN_INTERCEPT___CXA_RETHROW_PRIMARY_EXCEPTION 0
-# endif
+# define ASAN_INTERCEPT___CXA_RETHROW_PRIMARY_EXCEPTION 1
 # if defined(_GLIBCXX_SJLJ_EXCEPTIONS) || (SANITIZER_IOS && defined(__arm__))
 #  define ASAN_INTERCEPT__UNWIND_SJLJ_RAISEEXCEPTION 1
 # else
--- a/libsanitizer/asan/asan_linux.cpp
+++ b/libsanitizer/asan/asan_linux.cpp
@ -128,8 +128,8 @@ void AsanCheckIncompatibleRT() {}
 #else
 static int FindFirstDSOCallback(struct dl_phdr_info *info, size_t size,
                                void *data) {
-  VReport(2, "info->dlpi_name = %s\tinfo->dlpi_addr = %p\n",
-          info->dlpi_name, info->dlpi_addr);
+  VReport(2, "info->dlpi_name = %s\tinfo->dlpi_addr = %p\n", info->dlpi_name,
+          (void *)info->dlpi_addr);

  // Continue until the first dynamic library is found
  if (!info->dlpi_name || info->dlpi_name[0] == 0)
--- a/libsanitizer/asan/asan_mapping.h
+++ b/libsanitizer/asan/asan_mapping.h
@ -165,7 +165,7 @@ static const u64 kAArch64_ShadowOffset64 = 1ULL << 36;
 static const u64 kRiscv64_ShadowOffset64 = 0xd55550000;
 static const u64 kMIPS32_ShadowOffset32 = 0x0aaa0000;
 static const u64 kMIPS64_ShadowOffset64 = 1ULL << 37;
-static const u64 kPPC64_ShadowOffset64 = 1ULL << 41;
+static const u64 kPPC64_ShadowOffset64 = 1ULL << 44;
 static const u64 kSystemZ_ShadowOffset64 = 1ULL << 52;
 static const u64 kSPARC64_ShadowOffset64 = 1ULL << 43;  // 0x80000000000
 static const u64 kFreeBSD_ShadowOffset32 = 1ULL << 30;  // 0x40000000
--- a/libsanitizer/asan/asan_poisoning.cpp
+++ b/libsanitizer/asan/asan_poisoning.cpp
@ -66,7 +66,7 @@ void AsanPoisonOrUnpoisonIntraObjectRedzone(uptr ptr, uptr size, bool poison) {
  uptr end = ptr + size;
  if (Verbosity()) {
    Printf("__asan_%spoison_intra_object_redzone [%p,%p) %zd\n",
-           poison ? "" : "un", ptr, end, size);
+           poison ? "" : "un", (void *)ptr, (void *)end, size);
    if (Verbosity() >= 2)
      PRINT_CURRENT_STACK();
  }
--- a/libsanitizer/asan/asan_report.cpp
+++ b/libsanitizer/asan/asan_report.cpp
@ -67,14 +67,14 @@ static void PrintZoneForPointer(uptr ptr, uptr zone_ptr,
                                const char *zone_name) {
  if (zone_ptr) {
    if (zone_name) {
-      Printf("malloc_zone_from_ptr(%p) = %p, which is %s\n",
-                 ptr, zone_ptr, zone_name);
+      Printf("malloc_zone_from_ptr(%p) = %p, which is %s\n", (void *)ptr,
+             (void *)zone_ptr, zone_name);
    } else {
      Printf("malloc_zone_from_ptr(%p) = %p, which doesn't have a name\n",
-                 ptr, zone_ptr);
+             (void *)ptr, (void *)zone_ptr);
    }
  } else {
-    Printf("malloc_zone_from_ptr(%p) = 0\n", ptr);
+    Printf("malloc_zone_from_ptr(%p) = 0\n", (void *)ptr);
  }
 }

@ -435,9 +435,10 @@ static inline void CheckForInvalidPointerPair(void *p1, void *p2) {
 void ReportMacMzReallocUnknown(uptr addr, uptr zone_ptr, const char *zone_name,
                               BufferedStackTrace *stack) {
  ScopedInErrorReport in_report;
-  Printf("mz_realloc(%p) -- attempting to realloc unallocated memory.\n"
-             "This is an unrecoverable problem, exiting now.\n",
-             addr);
+  Printf(
+      "mz_realloc(%p) -- attempting to realloc unallocated memory.\n"
+      "This is an unrecoverable problem, exiting now.\n",
+      (void *)addr);
  PrintZoneForPointer(addr, zone_ptr, zone_name);
  stack->Print();
  DescribeAddressIfHeap(addr);
--- a/libsanitizer/asan/asan_rtl.cpp
+++ b/libsanitizer/asan/asan_rtl.cpp
@ -557,7 +557,8 @@ void UnpoisonStack(uptr bottom, uptr top, const char *type) {
        "False positive error reports may follow\n"
        "For details see "
        "https://github.com/google/sanitizers/issues/189\n",
-        type, top, bottom, top - bottom, top - bottom);
+        type, (void *)top, (void *)bottom, (void *)(top - bottom),
+        top - bottom);
    return;
  }
  PoisonShadow(bottom, RoundUpTo(top - bottom, SHADOW_GRANULARITY), 0);
--- a/libsanitizer/asan/asan_shadow_setup.cpp
+++ b/libsanitizer/asan/asan_shadow_setup.cpp
@ -33,7 +33,7 @@ static void ProtectGap(uptr addr, uptr size) {
          "protect_shadow_gap=0:"
          " not protecting shadow gap, allocating gap's shadow\n"
          "|| `[%p, %p]` || ShadowGap's shadow ||\n",
-          GapShadowBeg, GapShadowEnd);
+          (void*)GapShadowBeg, (void*)GapShadowEnd);
    ReserveShadowMemoryRange(GapShadowBeg, GapShadowEnd,
                             "unprotected gap shadow");
    return;
@ -113,7 +113,7 @@ void InitializeShadowMemory() {
        "Shadow memory range interleaves with an existing memory mapping. "
        "ASan cannot proceed correctly. ABORTING.\n");
    Report("ASan shadow was supposed to be located in the [%p-%p] range.\n",
-           shadow_start, kHighShadowEnd);
+           (void*)shadow_start, (void*)kHighShadowEnd);
    MaybeReportLinuxPIEBug();
    DumpProcessMap();
    Die();
--- a/libsanitizer/asan/asan_thread.cpp
+++ b/libsanitizer/asan/asan_thread.cpp
@ -254,7 +254,7 @@ void AsanThread::Init(const InitOptions *options) {
  int local = 0;
  VReport(1, "T%d: stack [%p,%p) size 0x%zx; local=%p\n", tid(),
          (void *)stack_bottom_, (void *)stack_top_, stack_top_ - stack_bottom_,
-          &local);
+          (void *)&local);
 }

 // Fuchsia doesn't use ThreadStart.
@ -443,7 +443,7 @@ AsanThread *GetCurrentThread() {

 void SetCurrentThread(AsanThread *t) {
  CHECK(t->context());
-  VReport(2, "SetCurrentThread: %p for thread %p\n", t->context(),
+  VReport(2, "SetCurrentThread: %p for thread %p\n", (void *)t->context(),
          (void *)GetThreadSelf());
  // Make sure we do not reset the current AsanThread.
  CHECK_EQ(0, AsanTSDGet());
--- a/libsanitizer/hwasan/hwasan.cpp
+++ b/libsanitizer/hwasan/hwasan.cpp
@ -16,6 +16,7 @@
 #include "hwasan_checks.h"
 #include "hwasan_dynamic_shadow.h"
 #include "hwasan_globals.h"
+#include "hwasan_mapping.h"
 #include "hwasan_poisoning.h"
 #include "hwasan_report.h"
 #include "hwasan_thread.h"
@ -391,8 +392,15 @@ void __hwasan_print_shadow(const void *p, uptr sz) {
  uptr shadow_last = MemToShadow(ptr_raw + sz - 1);
  Printf("HWASan shadow map for %zx .. %zx (pointer tag %x)\n", ptr_raw,
         ptr_raw + sz, GetTagFromPointer((uptr)p));
-  for (uptr s = shadow_first; s <= shadow_last; ++s)
-    Printf("  %zx: %x\n", ShadowToMem(s), *(tag_t *)s);
+  for (uptr s = shadow_first; s <= shadow_last; ++s) {
+    tag_t mem_tag = *reinterpret_cast<tag_t *>(s);
+    uptr granule_addr = ShadowToMem(s);
+    if (mem_tag && mem_tag < kShadowAlignment)
+      Printf("  %zx: %02x(%02x)\n", granule_addr, mem_tag,
+             *reinterpret_cast<tag_t *>(granule_addr + kShadowAlignment - 1));
+    else
+      Printf("  %zx: %02x\n", granule_addr, mem_tag);
+  }
 }

 sptr __hwasan_test_shadow(const void *p, uptr sz) {
--- a/libsanitizer/hwasan/hwasan_report.cpp
+++ b/libsanitizer/hwasan/hwasan_report.cpp
@ -702,12 +702,33 @@ void ReportTagMismatch(StackTrace *stack, uptr tagged_addr, uptr access_size,
  tag_t mem_tag = *tag_ptr;

  Printf("%s", d.Access());
-  Printf("%s of size %zu at %p tags: %02x/%02x (ptr/mem) in thread T%zd\n",
-         is_store ? "WRITE" : "READ", access_size, untagged_addr, ptr_tag,
-         mem_tag, t->unique_id());
+  if (mem_tag && mem_tag < kShadowAlignment) {
+    tag_t *granule_ptr = reinterpret_cast<tag_t *>((untagged_addr + offset) &
+                                                   ~(kShadowAlignment - 1));
+    // If offset is 0, (untagged_addr + offset) is not aligned to granules.
+    // This is the offset of the leftmost accessed byte within the bad granule.
+    u8 in_granule_offset = (untagged_addr + offset) & (kShadowAlignment - 1);
+    tag_t short_tag = granule_ptr[kShadowAlignment - 1];
+    // The first mismatch was a short granule that matched the ptr_tag.
+    if (short_tag == ptr_tag) {
+      // If the access starts after the end of the short granule, then the first
+      // bad byte is the first byte of the access; otherwise it is the first
+      // byte past the end of the short granule
+      if (mem_tag > in_granule_offset) {
+        offset += mem_tag - in_granule_offset;
+      }
+    }
+    Printf(
+        "%s of size %zu at %p tags: %02x/%02x(%02x) (ptr/mem) in thread T%zd\n",
+        is_store ? "WRITE" : "READ", access_size, untagged_addr, ptr_tag,
+        mem_tag, short_tag, t->unique_id());
+  } else {
+    Printf("%s of size %zu at %p tags: %02x/%02x (ptr/mem) in thread T%zd\n",
+           is_store ? "WRITE" : "READ", access_size, untagged_addr, ptr_tag,
+           mem_tag, t->unique_id());
+  }
  if (offset != 0)
-    Printf("Invalid access starting at offset [%zu, %zu)\n", offset,
-           Min(access_size, static_cast<uptr>(offset) + (1 << kShadowScale)));
+    Printf("Invalid access starting at offset %zu\n", offset);
  Printf("%s", d.Default());

  stack->Print();
--- a/libsanitizer/lsan/lsan_common.cpp
+++ b/libsanitizer/lsan/lsan_common.cpp
@ -188,7 +188,8 @@ void ScanRangeForPointers(uptr begin, uptr end,
                          const char *region_type, ChunkTag tag) {
  CHECK(tag == kReachable || tag == kIndirectlyLeaked);
  const uptr alignment = flags()->pointer_alignment();
-  LOG_POINTERS("Scanning %s range %p-%p.\n", region_type, begin, end);
+  LOG_POINTERS("Scanning %s range %p-%p.\n", region_type, (void *)begin,
+               (void *)end);
  uptr pp = begin;
  if (pp % alignment)
    pp = pp + alignment - pp % alignment;
@ -207,13 +208,15 @@ void ScanRangeForPointers(uptr begin, uptr end,
      LOG_POINTERS(
          "%p is poisoned: ignoring %p pointing into chunk %p-%p of size "
          "%zu.\n",
-          pp, p, chunk, chunk + m.requested_size(), m.requested_size());
+          (void *)pp, p, (void *)chunk, (void *)(chunk + m.requested_size()),
+          m.requested_size());
      continue;
    }

    m.set_tag(tag);
-    LOG_POINTERS("%p: found %p pointing into chunk %p-%p of size %zu.\n", pp, p,
-                 chunk, chunk + m.requested_size(), m.requested_size());
+    LOG_POINTERS("%p: found %p pointing into chunk %p-%p of size %zu.\n",
+                 (void *)pp, p, (void *)chunk,
+                 (void *)(chunk + m.requested_size()), m.requested_size());
    if (frontier)
      frontier->push_back(chunk);
  }
@ -281,7 +284,7 @@ static void ProcessThreads(SuspendedThreadsList const &suspended_threads,
  InternalMmapVector<uptr> registers;
  for (uptr i = 0; i < suspended_threads.ThreadCount(); i++) {
    tid_t os_id = static_cast<tid_t>(suspended_threads.GetThreadID(i));
-    LOG_THREADS("Processing thread %d.\n", os_id);
+    LOG_THREADS("Processing thread %llu.\n", os_id);
    uptr stack_begin, stack_end, tls_begin, tls_end, cache_begin, cache_end;
    DTLS *dtls;
    bool thread_found = GetThreadRangesLocked(os_id, &stack_begin, &stack_end,
@ -290,14 +293,14 @@ static void ProcessThreads(SuspendedThreadsList const &suspended_threads,
    if (!thread_found) {
      // If a thread can't be found in the thread registry, it's probably in the
      // process of destruction. Log this event and move on.
-      LOG_THREADS("Thread %d not found in registry.\n", os_id);
+      LOG_THREADS("Thread %llu not found in registry.\n", os_id);
      continue;
    }
    uptr sp;
    PtraceRegistersStatus have_registers =
        suspended_threads.GetRegistersAndSP(i, &registers, &sp);
    if (have_registers != REGISTERS_AVAILABLE) {
-      Report("Unable to get registers from thread %d.\n", os_id);
+      Report("Unable to get registers from thread %llu.\n", os_id);
      // If unable to get SP, consider the entire stack to be reachable unless
      // GetRegistersAndSP failed with ESRCH.
      if (have_registers == REGISTERS_UNAVAILABLE_FATAL) continue;
@ -313,7 +316,8 @@ static void ProcessThreads(SuspendedThreadsList const &suspended_threads,
    }

    if (flags()->use_stacks) {
-      LOG_THREADS("Stack at %p-%p (SP = %p).\n", stack_begin, stack_end, sp);
+      LOG_THREADS("Stack at %p-%p (SP = %p).\n", (void *)stack_begin,
+                  (void *)stack_end, (void *)sp);
      if (sp < stack_begin || sp >= stack_end) {
        // SP is outside the recorded stack range (e.g. the thread is running a
        // signal handler on alternate stack, or swapcontext was used).
@ -327,7 +331,7 @@ static void ProcessThreads(SuspendedThreadsList const &suspended_threads,
          stack_begin += page_size;
        }
        LOG_THREADS("Skipped %d guard page(s) to obtain stack %p-%p.\n",
-                    skipped, stack_begin, stack_end);
+                    skipped, (void *)stack_begin, (void *)stack_end);
      } else {
        // Shrink the stack range to ignore out-of-scope values.
        stack_begin = sp;
@ -339,7 +343,7 @@ static void ProcessThreads(SuspendedThreadsList const &suspended_threads,

    if (flags()->use_tls) {
      if (tls_begin) {
-        LOG_THREADS("TLS at %p-%p.\n", tls_begin, tls_end);
+        LOG_THREADS("TLS at %p-%p.\n", (void *)tls_begin, (void *)tls_end);
        // If the tls and cache ranges don't overlap, scan full tls range,
        // otherwise, only scan the non-overlapping portions
        if (cache_begin == cache_end || tls_end < cache_begin ||
@ -373,7 +377,8 @@ static void ProcessThreads(SuspendedThreadsList const &suspended_threads,
          uptr dtls_beg = dtv.beg;
          uptr dtls_end = dtls_beg + dtv.size;
          if (dtls_beg < dtls_end) {
-            LOG_THREADS("DTLS %zu at %p-%p.\n", id, dtls_beg, dtls_end);
+            LOG_THREADS("DTLS %d at %p-%p.\n", id, (void *)dtls_beg,
+                        (void *)dtls_end);
            ScanRangeForPointers(dtls_beg, dtls_end, frontier, "DTLS",
                                 kReachable);
          }
@ -381,7 +386,7 @@ static void ProcessThreads(SuspendedThreadsList const &suspended_threads,
      } else {
        // We are handling a thread with DTLS under destruction. Log about
        // this and continue.
-        LOG_THREADS("Thread %d has DTLS under destruction.\n", os_id);
+        LOG_THREADS("Thread %llu has DTLS under destruction.\n", os_id);
      }
 #endif
    }
@ -399,8 +404,9 @@ void ScanRootRegion(Frontier *frontier, const RootRegion &root_region,
  uptr intersection_end = Min(region_end, root_region.begin + root_region.size);
  if (intersection_begin >= intersection_end) return;
  LOG_POINTERS("Root region %p-%p intersects with mapped region %p-%p (%s)\n",
-               root_region.begin, root_region.begin + root_region.size,
-               region_begin, region_end,
+               (void *)root_region.begin,
+               (void *)(root_region.begin + root_region.size),
+               (void *)region_begin, (void *)region_end,
               is_readable ? "readable" : "unreadable");
  if (is_readable)
    ScanRangeForPointers(intersection_begin, intersection_end, frontier, "ROOT",
@ -460,8 +466,8 @@ static void IgnoredSuppressedCb(uptr chunk, void *arg) {
  if (idx >= suppressed.size() || m.stack_trace_id() != suppressed[idx])
    return;

-  LOG_POINTERS("Suppressed: chunk %p-%p of size %zu.\n", chunk,
-               chunk + m.requested_size(), m.requested_size());
+  LOG_POINTERS("Suppressed: chunk %p-%p of size %zu.\n", (void *)chunk,
+               (void *)(chunk + m.requested_size()), m.requested_size());
  m.set_tag(kIgnored);
 }

@ -472,8 +478,8 @@ static void CollectIgnoredCb(uptr chunk, void *arg) {
  chunk = GetUserBegin(chunk);
  LsanMetadata m(chunk);
  if (m.allocated() && m.tag() == kIgnored) {
-    LOG_POINTERS("Ignored: chunk %p-%p of size %zu.\n",
-                 chunk, chunk + m.requested_size(), m.requested_size());
+    LOG_POINTERS("Ignored: chunk %p-%p of size %zu.\n", (void *)chunk,
+                 (void *)(chunk + m.requested_size()), m.requested_size());
    reinterpret_cast<Frontier *>(arg)->push_back(chunk);
  }
 }
@ -487,7 +493,6 @@ static uptr GetCallerPC(const StackTrace &stack) {

 struct InvalidPCParam {
  Frontier *frontier;
-  const StackDepotReverseMap *stack_depot;
  bool skip_linker_allocations;
 };

@ -502,7 +507,7 @@ static void MarkInvalidPCCb(uptr chunk, void *arg) {
    u32 stack_id = m.stack_trace_id();
    uptr caller_pc = 0;
    if (stack_id > 0)
-      caller_pc = GetCallerPC(param->stack_depot->Get(stack_id));
+      caller_pc = GetCallerPC(StackDepotGet(stack_id));
    // If caller_pc is unknown, this chunk may be allocated in a coroutine. Mark
    // it as reachable, as we can't properly report its allocation stack anyway.
    if (caller_pc == 0 || (param->skip_linker_allocations &&
@ -533,11 +538,9 @@ static void MarkInvalidPCCb(uptr chunk, void *arg) {
 // which we don't care about).
 // On all other platforms, this simply checks to ensure that the caller pc is
 // valid before reporting chunks as leaked.
-static void ProcessPC(Frontier *frontier,
-                      const StackDepotReverseMap &stack_depot) {
+static void ProcessPC(Frontier *frontier) {
  InvalidPCParam arg;
  arg.frontier = frontier;
-  arg.stack_depot = &stack_depot;
  arg.skip_linker_allocations =
      flags()->use_tls && flags()->use_ld_allocations && GetLinker() != nullptr;
  ForEachChunk(MarkInvalidPCCb, &arg);
@ -545,7 +548,6 @@ static void ProcessPC(Frontier *frontier,

 // Sets the appropriate tag on each chunk.
 static void ClassifyAllChunks(SuspendedThreadsList const &suspended_threads,
-                              const StackDepotReverseMap &stack_depot,
                              Frontier *frontier) {
  const InternalMmapVector<u32> &suppressed_stacks =
      GetSuppressionContext()->GetSortedSuppressedStacks();
@ -560,7 +562,7 @@ static void ClassifyAllChunks(SuspendedThreadsList const &suspended_threads,
  FloodFillTag(frontier, kReachable);

  CHECK_EQ(0, frontier->size());
-  ProcessPC(frontier, stack_depot);
+  ProcessPC(frontier);

  // The check here is relatively expensive, so we do this in a separate flood
  // fill. That way we can skip the check for chunks that are reachable
@ -621,8 +623,9 @@ static void ReportIfNotSuspended(ThreadContextBase *tctx, void *arg) {
  if (tctx->status == ThreadStatusRunning) {
    uptr i = InternalLowerBound(suspended_threads, tctx->os_id);
    if (i >= suspended_threads.size() || suspended_threads[i] != tctx->os_id)
-      Report("Running thread %d was not suspended. False leaks are possible.\n",
-             tctx->os_id);
+      Report(
+          "Running thread %llu was not suspended. False leaks are possible.\n",
+          tctx->os_id);
  }
 }

@ -654,8 +657,7 @@ static void CheckForLeaksCallback(const SuspendedThreadsList &suspended_threads,
  CHECK(param);
  CHECK(!param->success);
  ReportUnsuspendedThreads(suspended_threads);
-  ClassifyAllChunks(suspended_threads, param->leak_report.stack_depot(),
-                    &param->frontier);
+  ClassifyAllChunks(suspended_threads, &param->frontier);
  ForEachChunk(CollectLeaksCb, &param->leak_report);
  // Clean up for subsequent leak checks. This assumes we did not overwrite any
  // kIgnored tags.
@ -795,7 +797,7 @@ void LeakReport::AddLeakedChunk(uptr chunk, u32 stack_trace_id,
  CHECK(tag == kDirectlyLeaked || tag == kIndirectlyLeaked);

  if (u32 resolution = flags()->resolution) {
-    StackTrace stack = stack_depot_.Get(stack_trace_id);
+    StackTrace stack = StackDepotGet(stack_trace_id);
    stack.size = Min(stack.size, resolution);
    stack_trace_id = StackDepotPut(stack);
  }
@ -863,7 +865,7 @@ void LeakReport::PrintReportForLeak(uptr index) {
  Printf("%s", d.Default());

  CHECK(leaks_[index].stack_trace_id);
-  stack_depot_.Get(leaks_[index].stack_trace_id).Print();
+  StackDepotGet(leaks_[index].stack_trace_id).Print();

  if (flags()->report_objects) {
    Printf("Objects leaked above:\n");
@ -876,7 +878,7 @@ void LeakReport::PrintLeakedObjectsForLeak(uptr index) {
  u32 leak_id = leaks_[index].id;
  for (uptr j = 0; j < leaked_objects_.size(); j++) {
    if (leaked_objects_[j].leak_id == leak_id)
-      Printf("%p (%zu bytes)\n", leaked_objects_[j].addr,
+      Printf("%p (%zu bytes)\n", (void *)leaked_objects_[j].addr,
             leaked_objects_[j].size);
  }
 }
@ -900,7 +902,7 @@ uptr LeakReport::ApplySuppressions() {
  uptr new_suppressions = false;
  for (uptr i = 0; i < leaks_.size(); i++) {
    Suppression *s = suppressions->GetSuppressionForStack(
-        leaks_[i].stack_trace_id, stack_depot_.Get(leaks_[i].stack_trace_id));
+        leaks_[i].stack_trace_id, StackDepotGet(leaks_[i].stack_trace_id));
    if (s) {
      s->weight += leaks_[i].total_size;
      atomic_store_relaxed(&s->hit_count, atomic_load_relaxed(&s->hit_count) +
@ -967,7 +969,7 @@ void __lsan_register_root_region(const void *begin, uptr size) {
  CHECK(root_regions);
  RootRegion region = {reinterpret_cast<uptr>(begin), size};
  root_regions->push_back(region);
-  VReport(1, "Registered root region at %p of size %llu\n", begin, size);
+  VReport(1, "Registered root region at %p of size %zu\n", begin, size);
 #endif // CAN_SANITIZE_LEAKS
 }

@ -984,13 +986,13 @@ void __lsan_unregister_root_region(const void *begin, uptr size) {
      uptr last_index = root_regions->size() - 1;
      (*root_regions)[i] = (*root_regions)[last_index];
      root_regions->pop_back();
-      VReport(1, "Unregistered root region at %p of size %llu\n", begin, size);
+      VReport(1, "Unregistered root region at %p of size %zu\n", begin, size);
      break;
    }
  }
  if (!removed) {
    Report(
-        "__lsan_unregister_root_region(): region at %p of size %llu has not "
+        "__lsan_unregister_root_region(): region at %p of size %zu has not "
        "been registered.\n",
        begin, size);
    Die();
--- a/libsanitizer/lsan/lsan_common.h
+++ b/libsanitizer/lsan/lsan_common.h
@ -108,14 +108,12 @@ class LeakReport {
  uptr ApplySuppressions();
  uptr UnsuppressedLeakCount();
  uptr IndirectUnsuppressedLeakCount();
-  const StackDepotReverseMap &stack_depot() { return stack_depot_; }

 private:
  void PrintReportForLeak(uptr index);
  void PrintLeakedObjectsForLeak(uptr index);

  u32 next_id_ = 0;
-  StackDepotReverseMap stack_depot_;
  InternalMmapVector<Leak> leaks_;
  InternalMmapVector<LeakedObject> leaked_objects_;
 };
--- a/libsanitizer/sanitizer_common/sanitizer_allocator.h
+++ b/libsanitizer/sanitizer_common/sanitizer_allocator.h
@ -14,6 +14,7 @@
 #define SANITIZER_ALLOCATOR_H

 #include "sanitizer_common.h"
+#include "sanitizer_flat_map.h"
 #include "sanitizer_internal_defs.h"
 #include "sanitizer_lfstack.h"
 #include "sanitizer_libc.h"
@ -43,12 +44,6 @@ void SetAllocatorOutOfMemory();

 void PrintHintAllocatorCannotReturnNull();

-// Allocators call these callbacks on mmap/munmap.
-struct NoOpMapUnmapCallback {
-  void OnMap(uptr p, uptr size) const { }
-  void OnUnmap(uptr p, uptr size) const { }
-};
-
 // Callback type for iterating over chunks.
 typedef void (*ForEachChunkCallback)(uptr chunk, void *arg);

@ -70,7 +65,6 @@ inline void RandomShuffle(T *a, u32 n, u32 *rand_state) {
 #include "sanitizer_allocator_size_class_map.h"
 #include "sanitizer_allocator_stats.h"
 #include "sanitizer_allocator_primary64.h"
-#include "sanitizer_allocator_bytemap.h"
 #include "sanitizer_allocator_primary32.h"
 #include "sanitizer_allocator_local_cache.h"
 #include "sanitizer_allocator_secondary.h"
--- a/libsanitizer/sanitizer_common/sanitizer_allocator_bytemap.h
+++ b/libsanitizer/sanitizer_common/sanitizer_allocator_bytemap.h
@ -1,107 +0,0 @@
-//===-- sanitizer_allocator_bytemap.h ---------------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// Part of the Sanitizer Allocator.
-//
-//===----------------------------------------------------------------------===//
-#ifndef SANITIZER_ALLOCATOR_H
-#error This file must be included inside sanitizer_allocator.h
-#endif
-
-// Maps integers in rage [0, kSize) to u8 values.
-template <u64 kSize, typename AddressSpaceViewTy = LocalAddressSpaceView>
-class FlatByteMap {
- public:
-  using AddressSpaceView = AddressSpaceViewTy;
-  void Init() {
-    internal_memset(map_, 0, sizeof(map_));
-  }
-
-  void set(uptr idx, u8 val) {
-    CHECK_LT(idx, kSize);
-    CHECK_EQ(0U, map_[idx]);
-    map_[idx] = val;
-  }
-  u8 operator[] (uptr idx) {
-    CHECK_LT(idx, kSize);
-    // FIXME: CHECK may be too expensive here.
-    return map_[idx];
-  }
- private:
-  u8 map_[kSize];
-};
-
-// TwoLevelByteMap maps integers in range [0, kSize1*kSize2) to u8 values.
-// It is implemented as a two-dimensional array: array of kSize1 pointers
-// to kSize2-byte arrays. The secondary arrays are mmaped on demand.
-// Each value is initially zero and can be set to something else only once.
-// Setting and getting values from multiple threads is safe w/o extra locking.
-template <u64 kSize1, u64 kSize2,
-          typename AddressSpaceViewTy = LocalAddressSpaceView,
-          class MapUnmapCallback = NoOpMapUnmapCallback>
-class TwoLevelByteMap {
- public:
-  using AddressSpaceView = AddressSpaceViewTy;
-  void Init() {
-    internal_memset(map1_, 0, sizeof(map1_));
-    mu_.Init();
-  }
-
-  void TestOnlyUnmap() {
-    for (uptr i = 0; i < kSize1; i++) {
-      u8 *p = Get(i);
-      if (!p) continue;
-      MapUnmapCallback().OnUnmap(reinterpret_cast<uptr>(p), kSize2);
-      UnmapOrDie(p, kSize2);
-    }
-  }
-
-  uptr size() const { return kSize1 * kSize2; }
-  uptr size1() const { return kSize1; }
-  uptr size2() const { return kSize2; }
-
-  void set(uptr idx, u8 val) {
-    CHECK_LT(idx, kSize1 * kSize2);
-    u8 *map2 = GetOrCreate(idx / kSize2);
-    CHECK_EQ(0U, map2[idx % kSize2]);
-    map2[idx % kSize2] = val;
-  }
-
-  u8 operator[] (uptr idx) const {
-    CHECK_LT(idx, kSize1 * kSize2);
-    u8 *map2 = Get(idx / kSize2);
-    if (!map2) return 0;
-    auto value_ptr = AddressSpaceView::Load(&map2[idx % kSize2]);
-    return *value_ptr;
-  }
-
- private:
-  u8 *Get(uptr idx) const {
-    CHECK_LT(idx, kSize1);
-    return reinterpret_cast<u8 *>(
-        atomic_load(&map1_[idx], memory_order_acquire));
-  }
-
-  u8 *GetOrCreate(uptr idx) {
-    u8 *res = Get(idx);
-    if (!res) {
-      SpinMutexLock l(&mu_);
-      if (!(res = Get(idx))) {
-        res = (u8*)MmapOrDie(kSize2, "TwoLevelByteMap");
-        MapUnmapCallback().OnMap(reinterpret_cast<uptr>(res), kSize2);
-        atomic_store(&map1_[idx], reinterpret_cast<uptr>(res),
-                     memory_order_release);
-      }
-    }
-    return res;
-  }
-
-  atomic_uintptr_t map1_[kSize1];
-  StaticSpinMutex mu_;
-};
-
--- a/libsanitizer/sanitizer_common/sanitizer_allocator_primary32.h
+++ b/libsanitizer/sanitizer_common/sanitizer_allocator_primary32.h
@ -198,8 +198,9 @@ class SizeClassAllocator32 {
    return GetSizeClass(p) != 0;
  }

-  uptr GetSizeClass(const void *p) {
-    return possible_regions[ComputeRegionId(reinterpret_cast<uptr>(p))];
+  uptr GetSizeClass(const void *p) const {
+    uptr id = ComputeRegionId(reinterpret_cast<uptr>(p));
+    return possible_regions.contains(id) ? possible_regions[id] : 0;
  }

  void *GetBlockBegin(const void *p) {
@ -251,9 +252,9 @@ class SizeClassAllocator32 {

  // Iterate over all existing chunks.
  // The allocator must be locked when calling this function.
-  void ForEachChunk(ForEachChunkCallback callback, void *arg) {
+  void ForEachChunk(ForEachChunkCallback callback, void *arg) const {
    for (uptr region = 0; region < kNumPossibleRegions; region++)
-      if (possible_regions[region]) {
+      if (possible_regions.contains(region) && possible_regions[region]) {
        uptr chunk_size = ClassIdToSize(possible_regions[region]);
        uptr max_chunks_in_region = kRegionSize / (chunk_size + kMetadataSize);
        uptr region_beg = region * kRegionSize;
@ -305,7 +306,7 @@ class SizeClassAllocator32 {
    MapUnmapCallback().OnMap(res, kRegionSize);
    stat->Add(AllocatorStatMapped, kRegionSize);
    CHECK(IsAligned(res, kRegionSize));
-    possible_regions.set(ComputeRegionId(res), static_cast<u8>(class_id));
+    possible_regions[ComputeRegionId(res)] = class_id;
    return res;
  }

--- a/libsanitizer/sanitizer_common/sanitizer_allocator_primary64.h
+++ b/libsanitizer/sanitizer_common/sanitizer_allocator_primary64.h
@ -302,9 +302,8 @@ class SizeClassAllocator64 {
    UnmapWithCallbackOrDie((uptr)address_range.base(), address_range.size());
  }

-  static void FillMemoryProfile(uptr start, uptr rss, bool file, uptr *stats,
-                           uptr stats_size) {
-    for (uptr class_id = 0; class_id < stats_size; class_id++)
+  static void FillMemoryProfile(uptr start, uptr rss, bool file, uptr *stats) {
+    for (uptr class_id = 0; class_id < kNumClasses; class_id++)
      if (stats[class_id] == start)
        stats[class_id] = rss;
  }
@ -330,7 +329,7 @@ class SizeClassAllocator64 {
    uptr rss_stats[kNumClasses];
    for (uptr class_id = 0; class_id < kNumClasses; class_id++)
      rss_stats[class_id] = SpaceBeg() + kRegionSize * class_id;
-    GetMemoryProfile(FillMemoryProfile, rss_stats, kNumClasses);
+    GetMemoryProfile(FillMemoryProfile, rss_stats);

    uptr total_mapped = 0;
    uptr total_rss = 0;
--- a/libsanitizer/sanitizer_common/sanitizer_asm.h
+++ b/libsanitizer/sanitizer_common/sanitizer_asm.h
@ -67,6 +67,9 @@
 #define NO_EXEC_STACK_DIRECTIVE
 #endif

-#if defined(__x86_64__) || defined(__i386__)
+#if (defined(__x86_64__) || defined(__i386__)) && defined(__has_include) && __has_include(<cet.h>)
 #include <cet.h>
 #endif
+#ifndef _CET_ENDBR
+#define _CET_ENDBR
+#endif
--- a/libsanitizer/sanitizer_common/sanitizer_chained_origin_depot.cpp
+++ b/libsanitizer/sanitizer_common/sanitizer_chained_origin_depot.cpp
@ -11,16 +11,58 @@

 #include "sanitizer_chained_origin_depot.h"

+#include "sanitizer_persistent_allocator.h"
+#include "sanitizer_stackdepotbase.h"
+
 namespace __sanitizer {

-bool ChainedOriginDepot::ChainedOriginDepotNode::eq(
-    hash_type hash, const args_type &args) const {
-  return here_id == args.here_id && prev_id == args.prev_id;
-}
+namespace {
+struct ChainedOriginDepotDesc {
+  u32 here_id;
+  u32 prev_id;
+};

-uptr ChainedOriginDepot::ChainedOriginDepotNode::storage_size(
-    const args_type &args) {
-  return sizeof(ChainedOriginDepotNode);
+struct ChainedOriginDepotNode {
+  using hash_type = u32;
+  u32 link;
+  u32 here_id;
+  u32 prev_id;
+
+  typedef ChainedOriginDepotDesc args_type;
+
+  bool eq(hash_type hash, const args_type &args) const;
+
+  static uptr allocated() { return 0; }
+
+  static hash_type hash(const args_type &args);
+
+  static bool is_valid(const args_type &args);
+
+  void store(u32 id, const args_type &args, hash_type other_hash);
+
+  args_type load(u32 id) const;
+
+  struct Handle {
+    const ChainedOriginDepotNode *node_ = nullptr;
+    u32 id_ = 0;
+    Handle(const ChainedOriginDepotNode *node, u32 id) : node_(node), id_(id) {}
+    bool valid() const { return node_; }
+    u32 id() const { return id_; }
+    int here_id() const { return node_->here_id; }
+    int prev_id() const { return node_->prev_id; }
+  };
+
+  static Handle get_handle(u32 id);
+
+  typedef Handle handle_type;
+};
+
+}  // namespace
+
+static StackDepotBase<ChainedOriginDepotNode, 4, 20> depot;
+
+bool ChainedOriginDepotNode::eq(hash_type hash, const args_type &args) const {
+  return here_id == args.here_id && prev_id == args.prev_id;
 }

 /* This is murmur2 hash for the 64->32 bit case.
@ -36,8 +78,8 @@ uptr ChainedOriginDepot::ChainedOriginDepotNode::storage_size(
   split, or one of two reserved values (-1) or (-2). Either case can
   dominate depending on the workload.
 */
-ChainedOriginDepot::ChainedOriginDepotNode::hash_type
-ChainedOriginDepot::ChainedOriginDepotNode::hash(const args_type &args) {
+ChainedOriginDepotNode::hash_type ChainedOriginDepotNode::hash(
+    const args_type &args) {
  const u32 m = 0x5bd1e995;
  const u32 seed = 0x9747b28c;
  const u32 r = 24;
@ -62,26 +104,21 @@ ChainedOriginDepot::ChainedOriginDepotNode::hash(const args_type &args) {
  return h;
 }

-bool ChainedOriginDepot::ChainedOriginDepotNode::is_valid(
-    const args_type &args) {
-  return true;
-}
+bool ChainedOriginDepotNode::is_valid(const args_type &args) { return true; }

-void ChainedOriginDepot::ChainedOriginDepotNode::store(const args_type &args,
-                                                       hash_type other_hash) {
+void ChainedOriginDepotNode::store(u32 id, const args_type &args,
+                                   hash_type other_hash) {
  here_id = args.here_id;
  prev_id = args.prev_id;
 }

-ChainedOriginDepot::ChainedOriginDepotNode::args_type
-ChainedOriginDepot::ChainedOriginDepotNode::load() const {
+ChainedOriginDepotNode::args_type ChainedOriginDepotNode::load(u32 id) const {
  args_type ret = {here_id, prev_id};
  return ret;
 }

-ChainedOriginDepot::ChainedOriginDepotNode::Handle
-ChainedOriginDepot::ChainedOriginDepotNode::get_handle() {
-  return Handle(this);
+ChainedOriginDepotNode::Handle ChainedOriginDepotNode::get_handle(u32 id) {
+  return Handle(&depot.nodes[id], id);
 }

 ChainedOriginDepot::ChainedOriginDepot() {}
@ -93,8 +130,7 @@ StackDepotStats ChainedOriginDepot::GetStats() const {
 bool ChainedOriginDepot::Put(u32 here_id, u32 prev_id, u32 *new_id) {
  ChainedOriginDepotDesc desc = {here_id, prev_id};
  bool inserted;
-  ChainedOriginDepotNode::Handle h = depot.Put(desc, &inserted);
-  *new_id = h.valid() ? h.id() : 0;
+  *new_id = depot.Put(desc, &inserted);
  return inserted;
 }

--- a/libsanitizer/sanitizer_common/sanitizer_chained_origin_depot.h
+++ b/libsanitizer/sanitizer_common/sanitizer_chained_origin_depot.h
@ -13,7 +13,6 @@
 #define SANITIZER_CHAINED_ORIGIN_DEPOT_H

 #include "sanitizer_common.h"
-#include "sanitizer_stackdepotbase.h"

 namespace __sanitizer {

@ -37,49 +36,6 @@ class ChainedOriginDepot {
  void UnlockAll();

 private:
-  struct ChainedOriginDepotDesc {
-    u32 here_id;
-    u32 prev_id;
-  };
-
-  struct ChainedOriginDepotNode {
-    using hash_type = u32;
-    ChainedOriginDepotNode *link;
-    u32 id;
-    u32 here_id;
-    u32 prev_id;
-
-    typedef ChainedOriginDepotDesc args_type;
-
-    bool eq(hash_type hash, const args_type &args) const;
-
-    static uptr storage_size(const args_type &args);
-
-    static hash_type hash(const args_type &args);
-
-    static bool is_valid(const args_type &args);
-
-    void store(const args_type &args, hash_type other_hash);
-
-    args_type load() const;
-
-    struct Handle {
-      ChainedOriginDepotNode *node_;
-      Handle() : node_(nullptr) {}
-      explicit Handle(ChainedOriginDepotNode *node) : node_(node) {}
-      bool valid() { return node_; }
-      u32 id() { return node_->id; }
-      int here_id() { return node_->here_id; }
-      int prev_id() { return node_->prev_id; }
-    };
-
-    Handle get_handle();
-
-    typedef Handle handle_type;
-  };
-
-  StackDepotBase<ChainedOriginDepotNode, 4, 20> depot;
-
  ChainedOriginDepot(const ChainedOriginDepot &) = delete;
  void operator=(const ChainedOriginDepot &) = delete;
 };
--- a/libsanitizer/sanitizer_common/sanitizer_common.h
+++ b/libsanitizer/sanitizer_common/sanitizer_common.h
@ -192,12 +192,13 @@ class ReservedAddressRange {
 };

 typedef void (*fill_profile_f)(uptr start, uptr rss, bool file,
-                               /*out*/uptr *stats, uptr stats_size);
+                               /*out*/ uptr *stats);

 // Parse the contents of /proc/self/smaps and generate a memory profile.
-// |cb| is a tool-specific callback that fills the |stats| array containing
-// |stats_size| elements.
-void GetMemoryProfile(fill_profile_f cb, uptr *stats, uptr stats_size);
+// |cb| is a tool-specific callback that fills the |stats| array.
+void GetMemoryProfile(fill_profile_f cb, uptr *stats);
+void ParseUnixMemoryProfile(fill_profile_f cb, uptr *stats, char *smaps,
+                            uptr smaps_len);

 // Simple low-level (mmap-based) allocator for internal use. Doesn't have
 // constructor, so all instances of LowLevelAllocator should be
@ -371,7 +372,7 @@ void ReportErrorSummary(const char *error_type, const AddressInfo &info,
 void ReportErrorSummary(const char *error_type, const StackTrace *trace,
                        const char *alt_tool_name = nullptr);

-void ReportMmapWriteExec(int prot);
+void ReportMmapWriteExec(int prot, int mflags);

 // Math
 #if SANITIZER_WINDOWS && !defined(__clang__) && !defined(__GNUC__)
@ -419,9 +420,7 @@ inline uptr LeastSignificantSetBitIndex(uptr x) {
  return up;
 }

-inline bool IsPowerOfTwo(uptr x) {
-  return (x & (x - 1)) == 0;
-}
+inline constexpr bool IsPowerOfTwo(uptr x) { return (x & (x - 1)) == 0; }

 inline uptr RoundUpToPowerOfTwo(uptr size) {
  CHECK(size);
@ -433,16 +432,16 @@ inline uptr RoundUpToPowerOfTwo(uptr size) {
  return 1ULL << (up + 1);
 }

-inline uptr RoundUpTo(uptr size, uptr boundary) {
+inline constexpr uptr RoundUpTo(uptr size, uptr boundary) {
  RAW_CHECK(IsPowerOfTwo(boundary));
  return (size + boundary - 1) & ~(boundary - 1);
 }

-inline uptr RoundDownTo(uptr x, uptr boundary) {
+inline constexpr uptr RoundDownTo(uptr x, uptr boundary) {
  return x & ~(boundary - 1);
 }

-inline bool IsAligned(uptr a, uptr alignment) {
+inline constexpr bool IsAligned(uptr a, uptr alignment) {
  return (a & (alignment - 1)) == 0;
 }

@ -722,12 +721,15 @@ void SortAndDedup(Container &v, Compare comp = {}) {
  v.resize(last + 1);
 }

+constexpr uptr kDefaultFileMaxSize = FIRST_32_SECOND_64(1 << 26, 1 << 28);
+
 // Opens the file 'file_name" and reads up to 'max_len' bytes.
 // The resulting buffer is mmaped and stored in '*buff'.
 // Returns true if file was successfully opened and read.
 bool ReadFileToVector(const char *file_name,
                      InternalMmapVectorNoCtor<char> *buff,
-                      uptr max_len = 1 << 26, error_t *errno_p = nullptr);
+                      uptr max_len = kDefaultFileMaxSize,
+                      error_t *errno_p = nullptr);

 // Opens the file 'file_name" and reads up to 'max_len' bytes.
 // This function is less I/O efficient than ReadFileToVector as it may reread
@ -738,7 +740,7 @@ bool ReadFileToVector(const char *file_name,
 // The total number of read bytes is stored in '*read_len'.
 // Returns true if file was successfully opened and read.
 bool ReadFileToBuffer(const char *file_name, char **buff, uptr *buff_size,
-                      uptr *read_len, uptr max_len = 1 << 26,
+                      uptr *read_len, uptr max_len = kDefaultFileMaxSize,
                      error_t *errno_p = nullptr);

 // When adding a new architecture, don't forget to also update
--- a/libsanitizer/sanitizer_common/sanitizer_common_interceptors.inc
+++ b/libsanitizer/sanitizer_common/sanitizer_common_interceptors.inc
@ -2422,6 +2422,60 @@ INTERCEPTOR(int, glob64, const char *pattern, int flags,
 #define INIT_GLOB64
 #endif  // SANITIZER_INTERCEPT_GLOB64

+#if SANITIZER_INTERCEPT_POSIX_SPAWN
+
+template <class RealSpawnPtr>
+static int PosixSpawnImpl(void *ctx, RealSpawnPtr *real_posix_spawn, pid_t *pid,
+                          const char *file_or_path, const void *file_actions,
+                          const void *attrp, char *const argv[],
+                          char *const envp[]) {
+  COMMON_INTERCEPTOR_READ_RANGE(ctx, file_or_path,
+                                internal_strlen(file_or_path) + 1);
+  if (argv) {
+    for (char *const *s = argv; ; ++s) {
+      COMMON_INTERCEPTOR_READ_RANGE(ctx, s, sizeof(*s));
+      if (!*s) break;
+      COMMON_INTERCEPTOR_READ_RANGE(ctx, *s, internal_strlen(*s) + 1);
+    }
+  }
+  if (envp) {
+    for (char *const *s = envp; ; ++s) {
+      COMMON_INTERCEPTOR_READ_RANGE(ctx, s, sizeof(*s));
+      if (!*s) break;
+      COMMON_INTERCEPTOR_READ_RANGE(ctx, *s, internal_strlen(*s) + 1);
+    }
+  }
+  int res =
+      real_posix_spawn(pid, file_or_path, file_actions, attrp, argv, envp);
+  if (res == 0)
+    COMMON_INTERCEPTOR_WRITE_RANGE(ctx, pid, sizeof(*pid));
+  return res;
+}
+INTERCEPTOR(int, posix_spawn, pid_t *pid, const char *path,
+            const void *file_actions, const void *attrp, char *const argv[],
+            char *const envp[]) {
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, posix_spawn, pid, path, file_actions, attrp,
+                           argv, envp);
+  return PosixSpawnImpl(ctx, REAL(posix_spawn), pid, path, file_actions, attrp,
+                        argv, envp);
+}
+INTERCEPTOR(int, posix_spawnp, pid_t *pid, const char *file,
+            const void *file_actions, const void *attrp, char *const argv[],
+            char *const envp[]) {
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, posix_spawnp, pid, file, file_actions, attrp,
+                           argv, envp);
+  return PosixSpawnImpl(ctx, REAL(posix_spawnp), pid, file, file_actions, attrp,
+                        argv, envp);
+}
+#  define INIT_POSIX_SPAWN                  \
+    COMMON_INTERCEPT_FUNCTION(posix_spawn); \
+    COMMON_INTERCEPT_FUNCTION(posix_spawnp);
+#else  // SANITIZER_INTERCEPT_POSIX_SPAWN
+#  define INIT_POSIX_SPAWN
+#endif  // SANITIZER_INTERCEPT_POSIX_SPAWN
+
 #if SANITIZER_INTERCEPT_WAIT
 // According to sys/wait.h, wait(), waitid(), waitpid() may have symbol version
 // suffixes on Darwin. See the declaration of INTERCEPTOR_WITH_SUFFIX for
@ -2658,17 +2712,20 @@ INTERCEPTOR(int, getnameinfo, void *sockaddr, unsigned salen, char *host,
 #endif

 #if SANITIZER_INTERCEPT_GETSOCKNAME
-INTERCEPTOR(int, getsockname, int sock_fd, void *addr, int *addrlen) {
+INTERCEPTOR(int, getsockname, int sock_fd, void *addr, unsigned *addrlen) {
  void *ctx;
  COMMON_INTERCEPTOR_ENTER(ctx, getsockname, sock_fd, addr, addrlen);
-  COMMON_INTERCEPTOR_READ_RANGE(ctx, addrlen, sizeof(*addrlen));
-  int addrlen_in = *addrlen;
+  unsigned addr_sz;
+  if (addrlen) {
+    COMMON_INTERCEPTOR_READ_RANGE(ctx, addrlen, sizeof(*addrlen));
+    addr_sz = *addrlen;
+  }
  // FIXME: under ASan the call below may write to freed memory and corrupt
  // its metadata. See
  // https://github.com/google/sanitizers/issues/321.
  int res = REAL(getsockname)(sock_fd, addr, addrlen);
-  if (res == 0) {
-    COMMON_INTERCEPTOR_WRITE_RANGE(ctx, addr, Min(addrlen_in, *addrlen));
+  if (!res && addr && addrlen) {
+    COMMON_INTERCEPTOR_WRITE_RANGE(ctx, addr, Min(addr_sz, *addrlen));
  }
  return res;
 }
@ -3173,13 +3230,17 @@ INTERCEPTOR(int, getpeername, int sockfd, void *addr, unsigned *addrlen) {
  void *ctx;
  COMMON_INTERCEPTOR_ENTER(ctx, getpeername, sockfd, addr, addrlen);
  unsigned addr_sz;
-  if (addrlen) addr_sz = *addrlen;
+  if (addrlen) {
+    COMMON_INTERCEPTOR_READ_RANGE(ctx, addrlen, sizeof(*addrlen));
+    addr_sz = *addrlen;
+  }
  // FIXME: under ASan the call below may write to freed memory and corrupt
  // its metadata. See
  // https://github.com/google/sanitizers/issues/321.
  int res = REAL(getpeername)(sockfd, addr, addrlen);
-  if (!res && addr && addrlen)
+  if (!res && addr && addrlen) {
    COMMON_INTERCEPTOR_WRITE_RANGE(ctx, addr, Min(addr_sz, *addrlen));
+  }
  return res;
 }
 #define INIT_GETPEERNAME COMMON_INTERCEPT_FUNCTION(getpeername);
@ -7418,7 +7479,7 @@ INTERCEPTOR(void *, mmap, void *addr, SIZE_T sz, int prot, int flags, int fd,
            OFF_T off) {
  void *ctx;
  if (common_flags()->detect_write_exec)
-    ReportMmapWriteExec(prot);
+    ReportMmapWriteExec(prot, flags);
  if (COMMON_INTERCEPTOR_NOTHING_IS_INITIALIZED)
    return (void *)internal_mmap(addr, sz, prot, flags, fd, off);
  COMMON_INTERCEPTOR_ENTER(ctx, mmap, addr, sz, prot, flags, fd, off);
@ -7428,7 +7489,7 @@ INTERCEPTOR(void *, mmap, void *addr, SIZE_T sz, int prot, int flags, int fd,
 INTERCEPTOR(int, mprotect, void *addr, SIZE_T sz, int prot) {
  void *ctx;
  if (common_flags()->detect_write_exec)
-    ReportMmapWriteExec(prot);
+    ReportMmapWriteExec(prot, 0);
  if (COMMON_INTERCEPTOR_NOTHING_IS_INITIALIZED)
    return (int)internal_mprotect(addr, sz, prot);
  COMMON_INTERCEPTOR_ENTER(ctx, mprotect, addr, sz, prot);
@ -7447,7 +7508,7 @@ INTERCEPTOR(void *, mmap64, void *addr, SIZE_T sz, int prot, int flags, int fd,
            OFF64_T off) {
  void *ctx;
  if (common_flags()->detect_write_exec)
-    ReportMmapWriteExec(prot);
+    ReportMmapWriteExec(prot, flags);
  if (COMMON_INTERCEPTOR_NOTHING_IS_INITIALIZED)
    return (void *)internal_mmap(addr, sz, prot, flags, fd, off);
  COMMON_INTERCEPTOR_ENTER(ctx, mmap64, addr, sz, prot, flags, fd, off);
@ -9033,10 +9094,10 @@ INTERCEPTOR(char *, MD2Data, const unsigned char *data, unsigned int len,
    return ret; \
  }

-SHA2_INTERCEPTORS(224, u32);
-SHA2_INTERCEPTORS(256, u32);
-SHA2_INTERCEPTORS(384, u64);
-SHA2_INTERCEPTORS(512, u64);
+SHA2_INTERCEPTORS(224, u32)
+SHA2_INTERCEPTORS(256, u32)
+SHA2_INTERCEPTORS(384, u64)
+SHA2_INTERCEPTORS(512, u64)

 #define INIT_SHA2_INTECEPTORS(LEN) \
  COMMON_INTERCEPT_FUNCTION(SHA##LEN##_Init); \
@ -10229,6 +10290,7 @@ static void InitializeCommonInterceptors() {
  INIT_TIME;
  INIT_GLOB;
  INIT_GLOB64;
+  INIT_POSIX_SPAWN;
  INIT_WAIT;
  INIT_WAIT4;
  INIT_INET;
--- a/libsanitizer/sanitizer_common/sanitizer_flat_map.h
+++ b/libsanitizer/sanitizer_common/sanitizer_flat_map.h
@ -0,0 +1,173 @@
+//===-- sanitizer_flat_map.h ------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Part of the Sanitizer Allocator.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SANITIZER_FLAT_MAP_H
+#define SANITIZER_FLAT_MAP_H
+
+#include "sanitizer_atomic.h"
+#include "sanitizer_common.h"
+#include "sanitizer_internal_defs.h"
+#include "sanitizer_local_address_space_view.h"
+#include "sanitizer_mutex.h"
+
+namespace __sanitizer {
+
+// Call these callbacks on mmap/munmap.
+struct NoOpMapUnmapCallback {
+  void OnMap(uptr p, uptr size) const {}
+  void OnUnmap(uptr p, uptr size) const {}
+};
+
+// Maps integers in rage [0, kSize) to values.
+template <typename T, u64 kSize,
+          typename AddressSpaceViewTy = LocalAddressSpaceView>
+class FlatMap {
+ public:
+  using AddressSpaceView = AddressSpaceViewTy;
+  void Init() { internal_memset(map_, 0, sizeof(map_)); }
+
+  constexpr uptr size() const { return kSize; }
+
+  bool contains(uptr idx) const {
+    CHECK_LT(idx, kSize);
+    return true;
+  }
+
+  T &operator[](uptr idx) {
+    DCHECK_LT(idx, kSize);
+    return map_[idx];
+  }
+
+  const T &operator[](uptr idx) const {
+    DCHECK_LT(idx, kSize);
+    return map_[idx];
+  }
+
+ private:
+  T map_[kSize];
+};
+
+// TwoLevelMap maps integers in range [0, kSize1*kSize2) to values.
+// It is implemented as a two-dimensional array: array of kSize1 pointers
+// to kSize2-byte arrays. The secondary arrays are mmaped on demand.
+// Each value is initially zero and can be set to something else only once.
+// Setting and getting values from multiple threads is safe w/o extra locking.
+template <typename T, u64 kSize1, u64 kSize2,
+          typename AddressSpaceViewTy = LocalAddressSpaceView,
+          class MapUnmapCallback = NoOpMapUnmapCallback>
+class TwoLevelMap {
+  static_assert(IsPowerOfTwo(kSize2), "Use a power of two for performance.");
+
+ public:
+  using AddressSpaceView = AddressSpaceViewTy;
+  void Init() {
+    mu_.Init();
+    internal_memset(map1_, 0, sizeof(map1_));
+  }
+
+  void TestOnlyUnmap() {
+    for (uptr i = 0; i < kSize1; i++) {
+      T *p = Get(i);
+      if (!p)
+        continue;
+      MapUnmapCallback().OnUnmap(reinterpret_cast<uptr>(p), MmapSize());
+      UnmapOrDie(p, kSize2);
+    }
+    Init();
+  }
+
+  uptr MemoryUsage() const {
+    uptr res = 0;
+    for (uptr i = 0; i < kSize1; i++) {
+      T *p = Get(i);
+      if (!p)
+        continue;
+      res += MmapSize();
+    }
+    return res;
+  }
+
+  constexpr uptr size() const { return kSize1 * kSize2; }
+  constexpr uptr size1() const { return kSize1; }
+  constexpr uptr size2() const { return kSize2; }
+
+  bool contains(uptr idx) const {
+    CHECK_LT(idx, kSize1 * kSize2);
+    return Get(idx / kSize2);
+  }
+
+  const T &operator[](uptr idx) const {
+    DCHECK_LT(idx, kSize1 * kSize2);
+    T *map2 = GetOrCreate(idx / kSize2);
+    return *AddressSpaceView::Load(&map2[idx % kSize2]);
+  }
+
+  T &operator[](uptr idx) {
+    DCHECK_LT(idx, kSize1 * kSize2);
+    T *map2 = GetOrCreate(idx / kSize2);
+    return *AddressSpaceView::LoadWritable(&map2[idx % kSize2]);
+  }
+
+ private:
+  constexpr uptr MmapSize() const {
+    return RoundUpTo(kSize2 * sizeof(T), GetPageSizeCached());
+  }
+
+  T *Get(uptr idx) const {
+    DCHECK_LT(idx, kSize1);
+    return reinterpret_cast<T *>(
+        atomic_load(&map1_[idx], memory_order_acquire));
+  }
+
+  T *GetOrCreate(uptr idx) const {
+    DCHECK_LT(idx, kSize1);
+    // This code needs to use memory_order_acquire/consume, but we use
+    // memory_order_relaxed for performance reasons (matters for arm64). We
+    // expect memory_order_relaxed to be effectively equivalent to
+    // memory_order_consume in this case for all relevant architectures: all
+    // dependent data is reachable only by dereferencing the resulting pointer.
+    // If relaxed load fails to see stored ptr, the code will fall back to
+    // Create() and reload the value again with locked mutex as a memory
+    // barrier.
+    T *res = reinterpret_cast<T *>(atomic_load_relaxed(&map1_[idx]));
+    if (LIKELY(res))
+      return res;
+    return Create(idx);
+  }
+
+  NOINLINE T *Create(uptr idx) const {
+    SpinMutexLock l(&mu_);
+    T *res = Get(idx);
+    if (!res) {
+      res = reinterpret_cast<T *>(MmapOrDie(MmapSize(), "TwoLevelMap"));
+      MapUnmapCallback().OnMap(reinterpret_cast<uptr>(res), kSize2);
+      atomic_store(&map1_[idx], reinterpret_cast<uptr>(res),
+                   memory_order_release);
+    }
+    return res;
+  }
+
+  mutable StaticSpinMutex mu_;
+  mutable atomic_uintptr_t map1_[kSize1];
+};
+
+template <u64 kSize, typename AddressSpaceViewTy = LocalAddressSpaceView>
+using FlatByteMap = FlatMap<u8, kSize, AddressSpaceViewTy>;
+
+template <u64 kSize1, u64 kSize2,
+          typename AddressSpaceViewTy = LocalAddressSpaceView,
+          class MapUnmapCallback = NoOpMapUnmapCallback>
+using TwoLevelByteMap =
+    TwoLevelMap<u8, kSize1, kSize2, AddressSpaceViewTy, MapUnmapCallback>;
+}  // namespace __sanitizer
+
+#endif
--- a/libsanitizer/sanitizer_common/sanitizer_fuchsia.cpp
+++ b/libsanitizer/sanitizer_common/sanitizer_fuchsia.cpp
@ -372,7 +372,7 @@ bool IsAccessibleMemoryRange(uptr beg, uptr size) {
 }

 // FIXME implement on this platform.
-void GetMemoryProfile(fill_profile_f cb, uptr *stats, uptr stats_size) {}
+void GetMemoryProfile(fill_profile_f cb, uptr *stats) {}

 bool ReadFileToBuffer(const char *file_name, char **buff, uptr *buff_size,
                      uptr *read_len, uptr max_len, error_t *errno_p) {
--- a/libsanitizer/sanitizer_common/sanitizer_linux_libcdep.cpp
+++ b/libsanitizer/sanitizer_common/sanitizer_linux_libcdep.cpp
@ -759,13 +759,9 @@ u32 GetNumberOfCPUs() {
 #elif SANITIZER_SOLARIS
  return sysconf(_SC_NPROCESSORS_ONLN);
 #else
-#if defined(CPU_COUNT)
  cpu_set_t CPUs;
  CHECK_EQ(sched_getaffinity(0, sizeof(cpu_set_t), &CPUs), 0);
  return CPU_COUNT(&CPUs);
-#else
-  return 1;
-#endif
 #endif
 }

--- a/libsanitizer/sanitizer_common/sanitizer_mac.cpp
+++ b/libsanitizer/sanitizer_common/sanitizer_mac.cpp
@ -37,7 +37,7 @@
 extern char **environ;
 #endif

-#if defined(__has_include) && __has_include(<os/trace.h>) && defined(__BLOCKS__)
+#if defined(__has_include) && __has_include(<os/trace.h>)
 #define SANITIZER_OS_TRACE 1
 #include <os/trace.h>
 #else
@ -70,15 +70,7 @@ extern "C" {
 #include <mach/mach_time.h>
 #include <mach/vm_statistics.h>
 #include <malloc/malloc.h>
-#if defined(__has_builtin) && __has_builtin(__builtin_os_log_format)
-# include <os/log.h>
-#else
-   /* Without support for __builtin_os_log_format, fall back to the older
-      method.  */
-# define OS_LOG_DEFAULT 0
-# define os_log_error(A,B,C) \
-  asl_log(nullptr, nullptr, ASL_LEVEL_ERR, "%s", (C));
-#endif
+#include <os/log.h>
 #include <pthread.h>
 #include <sched.h>
 #include <signal.h>
@ -1319,7 +1311,7 @@ uptr FindAvailableMemoryRange(uptr size, uptr alignment, uptr left_padding,
 }

 // FIXME implement on this platform.
-void GetMemoryProfile(fill_profile_f cb, uptr *stats, uptr stats_size) { }
+void GetMemoryProfile(fill_profile_f cb, uptr *stats) {}

 void SignalContext::DumpAllRegisters(void *context) {
  Report("Register values:\n");
--- a/libsanitizer/sanitizer_common/sanitizer_mac.h
+++ b/libsanitizer/sanitizer_common/sanitizer_mac.h
@ -14,26 +14,6 @@

 #include "sanitizer_common.h"
 #include "sanitizer_platform.h"
-
-/* TARGET_OS_OSX is not present in SDKs before Darwin16 (macOS 10.12) use
-   TARGET_OS_MAC (we have no support for iOS in any form for these versions,
-   so there's no ambiguity).  */
-#if !defined(TARGET_OS_OSX) && TARGET_OS_MAC
-# define TARGET_OS_OSX 1
-#endif
-
-/* Other TARGET_OS_xxx are not present on earlier versions, define them to
-   0 (we have no support for them; they are not valid targets anyway).  */
-#ifndef TARGET_OS_IOS
-#define TARGET_OS_IOS 0
-#endif
-#ifndef TARGET_OS_TV
-#define TARGET_OS_TV 0
-#endif
-#ifndef TARGET_OS_WATCH
-#define TARGET_OS_WATCH 0
-#endif
-
 #if SANITIZER_MAC
 #include "sanitizer_posix.h"

--- a/libsanitizer/sanitizer_common/sanitizer_mutex.cpp
+++ b/libsanitizer/sanitizer_common/sanitizer_mutex.cpp
@ -174,7 +174,7 @@ struct InternalDeadlockDetector {
    if (max_idx != MutexInvalid && !mutex_can_lock[max_idx][type]) {
      Printf("%s: internal deadlock: can't lock %s under %s mutex\n", SanitizerToolName,
             mutex_meta[type].name, mutex_meta[max_idx].name);
-      PrintMutexPC(pc);
+      PrintMutexPC(locked[max_idx].pc);
      CHECK(0);
    }
    locked[type].seq = ++sequence;
--- a/libsanitizer/sanitizer_common/sanitizer_mutex.h
+++ b/libsanitizer/sanitizer_common/sanitizer_mutex.h
@ -95,8 +95,11 @@ enum {

 // Go linker does not support THREADLOCAL variables,
 // so we can't use per-thread state.
+// Disable checked locks on Darwin. Although Darwin platforms support
+// THREADLOCAL variables they are not usable early on during process init when
+// `__sanitizer::Mutex` is used.
 #define SANITIZER_CHECK_DEADLOCKS \
-  (SANITIZER_DEBUG && !SANITIZER_GO && SANITIZER_SUPPORTS_THREADLOCAL)
+  (SANITIZER_DEBUG && !SANITIZER_GO && SANITIZER_SUPPORTS_THREADLOCAL && !SANITIZER_MAC)

 #if SANITIZER_CHECK_DEADLOCKS
 struct MutexMeta {
--- a/libsanitizer/sanitizer_common/sanitizer_persistent_allocator.cpp
+++ b/libsanitizer/sanitizer_common/sanitizer_persistent_allocator.cpp
@ -1,18 +0,0 @@
-//===-- sanitizer_persistent_allocator.cpp ----------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file is shared between AddressSanitizer and ThreadSanitizer
-// run-time libraries.
-//===----------------------------------------------------------------------===//
-#include "sanitizer_persistent_allocator.h"
-
-namespace __sanitizer {
-
-PersistentAllocator thePersistentAllocator;
-
-}  // namespace __sanitizer
--- a/libsanitizer/sanitizer_common/sanitizer_persistent_allocator.h
+++ b/libsanitizer/sanitizer_common/sanitizer_persistent_allocator.h
@ -20,50 +20,89 @@

 namespace __sanitizer {

+template <typename T>
 class PersistentAllocator {
 public:
-  void *alloc(uptr size);
+  T *alloc(uptr count = 1);
+  uptr allocated() const { return atomic_load_relaxed(&mapped_size); }
+
+  void TestOnlyUnmap();

 private:
-  void *tryAlloc(uptr size);
-  StaticSpinMutex mtx;  // Protects alloc of new blocks for region allocator.
+  T *tryAlloc(uptr count);
+  T *refillAndAlloc(uptr count);
+  mutable StaticSpinMutex mtx;  // Protects alloc of new blocks.
  atomic_uintptr_t region_pos;  // Region allocator for Node's.
  atomic_uintptr_t region_end;
+  atomic_uintptr_t mapped_size;
+
+  struct BlockInfo {
+    const BlockInfo *next;
+    uptr ptr;
+    uptr size;
+  };
+  const BlockInfo *curr;
 };

-inline void *PersistentAllocator::tryAlloc(uptr size) {
+template <typename T>
+inline T *PersistentAllocator<T>::tryAlloc(uptr count) {
  // Optimisic lock-free allocation, essentially try to bump the region ptr.
  for (;;) {
    uptr cmp = atomic_load(&region_pos, memory_order_acquire);
    uptr end = atomic_load(&region_end, memory_order_acquire);
-    if (cmp == 0 || cmp + size > end) return nullptr;
+    uptr size = count * sizeof(T);
+    if (cmp == 0 || cmp + size > end)
+      return nullptr;
    if (atomic_compare_exchange_weak(&region_pos, &cmp, cmp + size,
                                     memory_order_acquire))
-      return (void *)cmp;
+      return reinterpret_cast<T *>(cmp);
  }
 }

-inline void *PersistentAllocator::alloc(uptr size) {
+template <typename T>
+inline T *PersistentAllocator<T>::alloc(uptr count) {
  // First, try to allocate optimisitically.
-  void *s = tryAlloc(size);
-  if (s) return s;
+  T *s = tryAlloc(count);
+  if (LIKELY(s))
+    return s;
+  return refillAndAlloc(count);
+}
+
+template <typename T>
+inline T *PersistentAllocator<T>::refillAndAlloc(uptr count) {
  // If failed, lock, retry and alloc new superblock.
  SpinMutexLock l(&mtx);
  for (;;) {
-    s = tryAlloc(size);
-    if (s) return s;
+    T *s = tryAlloc(count);
+    if (s)
+      return s;
    atomic_store(&region_pos, 0, memory_order_relaxed);
-    uptr allocsz = 64 * 1024;
-    if (allocsz < size) allocsz = size;
+    uptr size = count * sizeof(T) + sizeof(BlockInfo);
+    uptr allocsz = RoundUpTo(Max<uptr>(size, 64u * 1024u), GetPageSizeCached());
    uptr mem = (uptr)MmapOrDie(allocsz, "stack depot");
+    BlockInfo *new_block = (BlockInfo *)(mem + allocsz) - 1;
+    new_block->next = curr;
+    new_block->ptr = mem;
+    new_block->size = allocsz;
+    curr = new_block;
+
+    atomic_fetch_add(&mapped_size, allocsz, memory_order_relaxed);
+
+    allocsz -= sizeof(BlockInfo);
    atomic_store(&region_end, mem + allocsz, memory_order_release);
    atomic_store(&region_pos, mem, memory_order_release);
  }
 }

-extern PersistentAllocator thePersistentAllocator;
-inline void *PersistentAlloc(uptr sz) {
-  return thePersistentAllocator.alloc(sz);
+template <typename T>
+void PersistentAllocator<T>::TestOnlyUnmap() {
+  while (curr) {
+    uptr mem = curr->ptr;
+    uptr allocsz = curr->size;
+    curr = curr->next;
+    UnmapOrDie((void *)mem, allocsz);
+  }
+  internal_memset(this, 0, sizeof(*this));
 }

 } // namespace __sanitizer
--- a/libsanitizer/sanitizer_common/sanitizer_platform_interceptors.h
+++ b/libsanitizer/sanitizer_common/sanitizer_platform_interceptors.h
@ -235,6 +235,7 @@
 #define SANITIZER_INTERCEPT_TIME SI_POSIX
 #define SANITIZER_INTERCEPT_GLOB (SI_GLIBC || SI_SOLARIS)
 #define SANITIZER_INTERCEPT_GLOB64 SI_GLIBC
+#define SANITIZER_INTERCEPT_POSIX_SPAWN SI_POSIX
 #define SANITIZER_INTERCEPT_WAIT SI_POSIX
 #define SANITIZER_INTERCEPT_INET SI_POSIX
 #define SANITIZER_INTERCEPT_PTHREAD_GETSCHEDPARAM SI_POSIX
@ -460,10 +461,13 @@
 #define SANITIZER_INTERCEPT_SEND_SENDTO SI_POSIX
 #define SANITIZER_INTERCEPT_EVENTFD_READ_WRITE SI_LINUX

-#define SANITIZER_INTERCEPT_STAT \
-  (SI_FREEBSD || SI_MAC || SI_ANDROID || SI_NETBSD || SI_SOLARIS)
+#define SI_STAT_LINUX (SI_LINUX && __GLIBC_PREREQ(2, 33))
+#define SANITIZER_INTERCEPT_STAT                                        \
+  (SI_FREEBSD || SI_MAC || SI_ANDROID || SI_NETBSD || SI_SOLARIS ||     \
+   SI_STAT_LINUX)
 #define SANITIZER_INTERCEPT_LSTAT (SI_NETBSD || SI_FREEBSD)
-#define SANITIZER_INTERCEPT___XSTAT (!SANITIZER_INTERCEPT_STAT && SI_POSIX)
+#define SANITIZER_INTERCEPT___XSTAT                             \
+  (!SANITIZER_INTERCEPT_STAT && SI_POSIX) || SI_STAT_LINUX
 #define SANITIZER_INTERCEPT___XSTAT64 SI_LINUX_NOT_ANDROID
 #define SANITIZER_INTERCEPT___LXSTAT SANITIZER_INTERCEPT___XSTAT
 #define SANITIZER_INTERCEPT___LXSTAT64 SI_LINUX_NOT_ANDROID
@ -477,7 +481,7 @@
  (SI_LINUX_NOT_ANDROID || SI_MAC || SI_FREEBSD || SI_NETBSD)

 #define SANITIZER_INTERCEPT_MMAP SI_POSIX
-#define SANITIZER_INTERCEPT_MMAP64 SI_LINUX_NOT_ANDROID
+#define SANITIZER_INTERCEPT_MMAP64 SI_LINUX_NOT_ANDROID || SI_SOLARIS
 #define SANITIZER_INTERCEPT_MALLOPT_AND_MALLINFO (SI_GLIBC || SI_ANDROID)
 #define SANITIZER_INTERCEPT_MEMALIGN (!SI_FREEBSD && !SI_MAC && !SI_NETBSD)
 #define SANITIZER_INTERCEPT___LIBC_MEMALIGN SI_GLIBC
--- a/libsanitizer/sanitizer_common/sanitizer_platform_limits_linux.cpp
+++ b/libsanitizer/sanitizer_common/sanitizer_platform_limits_linux.cpp
@ -26,10 +26,7 @@

 // With old kernels (and even new kernels on powerpc) asm/stat.h uses types that
 // are not defined anywhere in userspace headers. Fake them. This seems to work
-// fine with newer headers, too.  Beware that with <sys/stat.h>, struct stat
-// takes the form of struct stat64 on 32-bit platforms if _FILE_OFFSET_BITS=64.
-// Also, for some platforms (e.g. mips) there are additional members in the
-// <sys/stat.h> struct stat:s.
+// fine with newer headers, too.
 #include <linux/posix_types.h>
 #  if defined(__x86_64__) || defined(__mips__) || defined(__hexagon__)
 #    include <sys/stat.h>
--- a/libsanitizer/sanitizer_common/sanitizer_platform_limits_posix.h
+++ b/libsanitizer/sanitizer_common/sanitizer_platform_limits_posix.h
@ -83,7 +83,7 @@ const unsigned struct_kernel_stat64_sz = 104;
 #elif defined(__mips__)
 const unsigned struct_kernel_stat_sz = SANITIZER_ANDROID
                                           ? FIRST_32_SECOND_64(104, 128)
-                                           : FIRST_32_SECOND_64(144, 216);
+                                           : FIRST_32_SECOND_64(160, 216);
 const unsigned struct_kernel_stat64_sz = 104;
 #elif defined(__s390__) && !defined(__s390x__)
 const unsigned struct_kernel_stat_sz = 64;
--- a/libsanitizer/sanitizer_common/sanitizer_procmaps_common.cpp
+++ b/libsanitizer/sanitizer_common/sanitizer_procmaps_common.cpp
@ -145,28 +145,44 @@ void MemoryMappingLayout::DumpListOfModules(
  }
 }

-void GetMemoryProfile(fill_profile_f cb, uptr *stats, uptr stats_size) {
+void GetMemoryProfile(fill_profile_f cb, uptr *stats) {
  char *smaps = nullptr;
  uptr smaps_cap = 0;
  uptr smaps_len = 0;
  if (!ReadFileToBuffer("/proc/self/smaps", &smaps, &smaps_cap, &smaps_len))
    return;
+  ParseUnixMemoryProfile(cb, stats, smaps, smaps_len);
+  UnmapOrDie(smaps, smaps_cap);
+}
+
+void ParseUnixMemoryProfile(fill_profile_f cb, uptr *stats, char *smaps,
+                            uptr smaps_len) {
  uptr start = 0;
  bool file = false;
  const char *pos = smaps;
-  while (pos < smaps + smaps_len) {
+  char *end = smaps + smaps_len;
+  if (smaps_len < 2)
+    return;
+  // The following parsing can crash on almost every line
+  // in the case of malformed/truncated input.
+  // Fixing that is hard b/c e.g. ParseDecimal does not
+  // even accept end of the buffer and assumes well-formed input.
+  // So instead we patch end of the input a bit,
+  // it does not affect well-formed complete inputs.
+  *--end = 0;
+  *--end = '\n';
+  while (pos < end) {
    if (IsHex(pos[0])) {
      start = ParseHex(&pos);
      for (; *pos != '/' && *pos > '\n'; pos++) {}
      file = *pos == '/';
    } else if (internal_strncmp(pos, "Rss:", 4) == 0) {
-      while (!IsDecimal(*pos)) pos++;
+      while (pos < end && !IsDecimal(*pos)) pos++;
      uptr rss = ParseDecimal(&pos) * 1024;
-      cb(start, rss, file, stats, stats_size);
+      cb(start, rss, file, stats);
    }
    while (*pos++ != '\n') {}
  }
-  UnmapOrDie(smaps, smaps_cap);
 }

 } // namespace __sanitizer
--- a/libsanitizer/sanitizer_common/sanitizer_procmaps_solaris.cpp
+++ b/libsanitizer/sanitizer_common/sanitizer_procmaps_solaris.cpp
@ -55,7 +55,15 @@ bool MemoryMappingLayout::Next(MemoryMappedSegment *segment) {

    internal_snprintf(proc_path, sizeof(proc_path), "/proc/self/path/%s",
                      xmapentry->pr_mapname);
-    internal_readlink(proc_path, segment->filename, segment->filename_size);
+    ssize_t sz = internal_readlink(proc_path, segment->filename,
+                                   segment->filename_size - 1);
+
+    // If readlink failed, the map is anonymous.
+    if (sz == -1) {
+      segment->filename[0] = '\0';
+    } else if ((size_t)sz < segment->filename_size)
+      // readlink doesn't NUL-terminate.
+      segment->filename[sz] = '\0';
  }

  data_.current += sizeof(prxmap_t);
--- a/libsanitizer/sanitizer_common/sanitizer_stackdepot.cpp
+++ b/libsanitizer/sanitizer_common/sanitizer_stackdepot.cpp
@ -14,93 +14,93 @@

 #include "sanitizer_common.h"
 #include "sanitizer_hash.h"
+#include "sanitizer_persistent_allocator.h"
 #include "sanitizer_stackdepotbase.h"

 namespace __sanitizer {

+static PersistentAllocator<uptr> traceAllocator;
+
 struct StackDepotNode {
-  using hash_type = u32;
-  StackDepotNode *link;
-  u32 id;
+  using hash_type = u64;
  hash_type stack_hash;
-  u32 size;
-  atomic_uint32_t tag_and_use_count;  // tag : 12 high bits; use_count : 20;
-  uptr stack[1];  // [size]
+  u32 link;

  static const u32 kTabSizeLog = SANITIZER_ANDROID ? 16 : 20;
-  static const u32 kUseCountBits = 20;
-  static const u32 kMaxUseCount = 1 << kUseCountBits;
-  static const u32 kUseCountMask = (1 << kUseCountBits) - 1;
+  static const u32 kStackSizeBits = 16;

  typedef StackTrace args_type;
  bool eq(hash_type hash, const args_type &args) const {
-    u32 tag =
-        atomic_load(&tag_and_use_count, memory_order_relaxed) >> kUseCountBits;
-    if (stack_hash != hash || args.size != size || args.tag != tag)
-      return false;
-    uptr i = 0;
-    for (; i < size; i++) {
-      if (stack[i] != args.trace[i]) return false;
-    }
-    return true;
-  }
-  static uptr storage_size(const args_type &args) {
-    return sizeof(StackDepotNode) + (args.size - 1) * sizeof(uptr);
+    return hash == stack_hash;
  }
+  static uptr allocated();
  static hash_type hash(const args_type &args) {
-    MurMur2HashBuilder H(args.size * sizeof(uptr));
+    MurMur2Hash64Builder H(args.size * sizeof(uptr));
    for (uptr i = 0; i < args.size; i++) H.add(args.trace[i]);
+    H.add(args.tag);
    return H.get();
  }
  static bool is_valid(const args_type &args) {
    return args.size > 0 && args.trace;
  }
-  void store(const args_type &args, hash_type hash) {
-    CHECK_EQ(args.tag & (~kUseCountMask >> kUseCountBits), args.tag);
-    atomic_store(&tag_and_use_count, args.tag << kUseCountBits,
-                 memory_order_relaxed);
-    stack_hash = hash;
-    size = args.size;
-    internal_memcpy(stack, args.trace, size * sizeof(uptr));
-  }
-  args_type load() const {
-    u32 tag =
-        atomic_load(&tag_and_use_count, memory_order_relaxed) >> kUseCountBits;
-    return args_type(&stack[0], size, tag);
-  }
-  StackDepotHandle get_handle() { return StackDepotHandle(this); }
+  void store(u32 id, const args_type &args, hash_type hash);
+  args_type load(u32 id) const;
+  static StackDepotHandle get_handle(u32 id);

  typedef StackDepotHandle handle_type;
 };

-COMPILER_CHECK(StackDepotNode::kMaxUseCount >= (u32)kStackDepotMaxUseCount);
-
-u32 StackDepotHandle::id() { return node_->id; }
-int StackDepotHandle::use_count() {
-  return atomic_load(&node_->tag_and_use_count, memory_order_relaxed) &
-         StackDepotNode::kUseCountMask;
-}
-void StackDepotHandle::inc_use_count_unsafe() {
-  u32 prev =
-      atomic_fetch_add(&node_->tag_and_use_count, 1, memory_order_relaxed) &
-      StackDepotNode::kUseCountMask;
-  CHECK_LT(prev + 1, StackDepotNode::kMaxUseCount);
-}
-
 // FIXME(dvyukov): this single reserved bit is used in TSan.
 typedef StackDepotBase<StackDepotNode, 1, StackDepotNode::kTabSizeLog>
    StackDepot;
 static StackDepot theDepot;
+// Keep rarely accessed stack traces out of frequently access nodes to improve
+// caching efficiency.
+static TwoLevelMap<uptr *, StackDepot::kNodesSize1, StackDepot::kNodesSize2>
+    tracePtrs;
+// Keep mutable data out of frequently access nodes to improve caching
+// efficiency.
+static TwoLevelMap<atomic_uint32_t, StackDepot::kNodesSize1,
+                   StackDepot::kNodesSize2>
+    useCounts;
+
+int StackDepotHandle::use_count() const {
+  return atomic_load_relaxed(&useCounts[id_]);
+}
+
+void StackDepotHandle::inc_use_count_unsafe() {
+  atomic_fetch_add(&useCounts[id_], 1, memory_order_relaxed);
+}
+
+uptr StackDepotNode::allocated() {
+  return traceAllocator.allocated() + tracePtrs.MemoryUsage() +
+         useCounts.MemoryUsage();
+}
+
+void StackDepotNode::store(u32 id, const args_type &args, hash_type hash) {
+  stack_hash = hash;
+  uptr *stack_trace = traceAllocator.alloc(args.size + 1);
+  CHECK_LT(args.size, 1 << kStackSizeBits);
+  *stack_trace = args.size + (args.tag << kStackSizeBits);
+  internal_memcpy(stack_trace + 1, args.trace, args.size * sizeof(uptr));
+  tracePtrs[id] = stack_trace;
+}
+
+StackDepotNode::args_type StackDepotNode::load(u32 id) const {
+  const uptr *stack_trace = tracePtrs[id];
+  if (!stack_trace)
+    return {};
+  uptr size = *stack_trace & ((1 << kStackSizeBits) - 1);
+  uptr tag = *stack_trace >> kStackSizeBits;
+  return args_type(stack_trace + 1, size, tag);
+}

 StackDepotStats StackDepotGetStats() { return theDepot.GetStats(); }

-u32 StackDepotPut(StackTrace stack) {
-  StackDepotHandle h = theDepot.Put(stack);
-  return h.valid() ? h.id() : 0;
-}
+u32 StackDepotPut(StackTrace stack) { return theDepot.Put(stack); }

 StackDepotHandle StackDepotPut_WithHandle(StackTrace stack) {
-  return theDepot.Put(stack);
+  return StackDepotNode::get_handle(theDepot.Put(stack));
 }

 StackTrace StackDepotGet(u32 id) {
@ -121,37 +121,14 @@ void StackDepotPrintAll() {
 #endif
 }

-bool StackDepotReverseMap::IdDescPair::IdComparator(
-    const StackDepotReverseMap::IdDescPair &a,
-    const StackDepotReverseMap::IdDescPair &b) {
-  return a.id < b.id;
+StackDepotHandle StackDepotNode::get_handle(u32 id) {
+  return StackDepotHandle(&theDepot.nodes[id], id);
 }

-void StackDepotReverseMap::Init() const {
-  if (LIKELY(map_.capacity()))
-    return;
-  map_.reserve(StackDepotGetStats().n_uniq_ids + 100);
-  for (int idx = 0; idx < StackDepot::kTabSize; idx++) {
-    atomic_uintptr_t *p = &theDepot.tab[idx];
-    uptr v = atomic_load(p, memory_order_consume);
-    StackDepotNode *s = (StackDepotNode*)(v & ~1);
-    for (; s; s = s->link) {
-      IdDescPair pair = {s->id, s};
-      map_.push_back(pair);
-    }
-  }
-  Sort(map_.data(), map_.size(), &IdDescPair::IdComparator);
-}
-
-StackTrace StackDepotReverseMap::Get(u32 id) const {
-  Init();
-  if (!map_.size())
-    return StackTrace();
-  IdDescPair pair = {id, nullptr};
-  uptr idx = InternalLowerBound(map_, pair, IdDescPair::IdComparator);
-  if (idx > map_.size() || map_[idx].id != id)
-    return StackTrace();
-  return map_[idx].desc->load();
+void StackDepotTestOnlyUnmap() {
+  theDepot.TestOnlyUnmap();
+  tracePtrs.TestOnlyUnmap();
+  traceAllocator.TestOnlyUnmap();
 }

 } // namespace __sanitizer
--- a/libsanitizer/sanitizer_common/sanitizer_stackdepot.h
+++ b/libsanitizer/sanitizer_common/sanitizer_stackdepot.h
@ -22,12 +22,12 @@ namespace __sanitizer {
 // StackDepot efficiently stores huge amounts of stack traces.
 struct StackDepotNode;
 struct StackDepotHandle {
-  StackDepotNode *node_;
-  StackDepotHandle() : node_(nullptr) {}
-  explicit StackDepotHandle(StackDepotNode *node) : node_(node) {}
-  bool valid() { return node_; }
-  u32 id();
-  int use_count();
+  StackDepotNode *node_ = nullptr;
+  u32 id_ = 0;
+  StackDepotHandle(StackDepotNode *node, u32 id) : node_(node), id_(id) {}
+  bool valid() const { return node_; }
+  u32 id() const { return id_; }
+  int use_count() const;
  void inc_use_count_unsafe();
 };

@ -43,31 +43,7 @@ void StackDepotLockAll();
 void StackDepotUnlockAll();
 void StackDepotPrintAll();

-// Instantiating this class creates a snapshot of StackDepot which can be
-// efficiently queried with StackDepotGet(). You can use it concurrently with
-// StackDepot, but the snapshot is only guaranteed to contain those stack traces
-// which were stored before it was instantiated.
-class StackDepotReverseMap {
- public:
-  StackDepotReverseMap() = default;
-  StackTrace Get(u32 id) const;
-
- private:
-  struct IdDescPair {
-    u32 id;
-    StackDepotNode *desc;
-
-    static bool IdComparator(const IdDescPair &a, const IdDescPair &b);
-  };
-
-  void Init() const;
-
-  mutable InternalMmapVector<IdDescPair> map_;
-
-  // Disallow evil constructors.
-  StackDepotReverseMap(const StackDepotReverseMap&);
-  void operator=(const StackDepotReverseMap&);
-};
+void StackDepotTestOnlyUnmap();

 } // namespace __sanitizer

--- a/libsanitizer/sanitizer_common/sanitizer_stackdepotbase.h
+++ b/libsanitizer/sanitizer_common/sanitizer_stackdepotbase.h
@ -16,72 +16,87 @@
 #include <stdio.h>

 #include "sanitizer_atomic.h"
+#include "sanitizer_flat_map.h"
 #include "sanitizer_internal_defs.h"
 #include "sanitizer_mutex.h"
-#include "sanitizer_persistent_allocator.h"

 namespace __sanitizer {

 template <class Node, int kReservedBits, int kTabSizeLog>
 class StackDepotBase {
+  static constexpr u32 kIdSizeLog =
+      sizeof(u32) * 8 - Max(kReservedBits, 1 /* At least 1 bit for locking. */);
+  static constexpr u32 kNodesSize1Log = kIdSizeLog / 2;
+  static constexpr u32 kNodesSize2Log = kIdSizeLog - kNodesSize1Log;
+  static constexpr int kTabSize = 1 << kTabSizeLog;  // Hash table size.
+  static constexpr u32 kUnlockMask = (1ull << kIdSizeLog) - 1;
+  static constexpr u32 kLockMask = ~kUnlockMask;
+
 public:
  typedef typename Node::args_type args_type;
  typedef typename Node::handle_type handle_type;
  typedef typename Node::hash_type hash_type;
+
+  static constexpr u64 kNodesSize1 = 1ull << kNodesSize1Log;
+  static constexpr u64 kNodesSize2 = 1ull << kNodesSize2Log;
+
  // Maps stack trace to an unique id.
-  handle_type Put(args_type args, bool *inserted = nullptr);
+  u32 Put(args_type args, bool *inserted = nullptr);
  // Retrieves a stored stack trace by the id.
  args_type Get(u32 id);

-  StackDepotStats GetStats() const { return stats; }
+  StackDepotStats GetStats() const {
+    return {
+        atomic_load_relaxed(&n_uniq_ids),
+        nodes.MemoryUsage() + Node::allocated(),
+    };
+  }

  void LockAll();
  void UnlockAll();
  void PrintAll();

+  void TestOnlyUnmap() {
+    nodes.TestOnlyUnmap();
+    internal_memset(this, 0, sizeof(*this));
+  }
+
 private:
-  static Node *find(Node *s, args_type args, hash_type hash);
-  static Node *lock(atomic_uintptr_t *p);
-  static void unlock(atomic_uintptr_t *p, Node *s);
+  friend Node;
+  u32 find(u32 s, args_type args, hash_type hash) const;
+  static u32 lock(atomic_uint32_t *p);
+  static void unlock(atomic_uint32_t *p, u32 s);
+  atomic_uint32_t tab[kTabSize];  // Hash table of Node's.

-  static const int kTabSize = 1 << kTabSizeLog;  // Hash table size.
-  static const int kPartBits = 8;
-  static const int kPartShift = sizeof(u32) * 8 - kPartBits - kReservedBits;
-  static const int kPartCount =
-      1 << kPartBits;  // Number of subparts in the table.
-  static const int kPartSize = kTabSize / kPartCount;
-  static const int kMaxId = 1 << kPartShift;
+  atomic_uint32_t n_uniq_ids;

-  atomic_uintptr_t tab[kTabSize];   // Hash table of Node's.
-  atomic_uint32_t seq[kPartCount];  // Unique id generators.
-
-  StackDepotStats stats;
+  TwoLevelMap<Node, kNodesSize1, kNodesSize2> nodes;

  friend class StackDepotReverseMap;
 };

 template <class Node, int kReservedBits, int kTabSizeLog>
-Node *StackDepotBase<Node, kReservedBits, kTabSizeLog>::find(Node *s,
-                                                             args_type args,
-                                                             hash_type hash) {
+u32 StackDepotBase<Node, kReservedBits, kTabSizeLog>::find(
+    u32 s, args_type args, hash_type hash) const {
  // Searches linked list s for the stack, returns its id.
-  for (; s; s = s->link) {
-    if (s->eq(hash, args)) {
+  for (; s;) {
+    const Node &node = nodes[s];
+    if (node.eq(hash, args))
      return s;
-    }
+    s = node.link;
  }
-  return nullptr;
+  return 0;
 }

 template <class Node, int kReservedBits, int kTabSizeLog>
-Node *StackDepotBase<Node, kReservedBits, kTabSizeLog>::lock(
-    atomic_uintptr_t *p) {
+u32 StackDepotBase<Node, kReservedBits, kTabSizeLog>::lock(atomic_uint32_t *p) {
  // Uses the pointer lsb as mutex.
  for (int i = 0;; i++) {
-    uptr cmp = atomic_load(p, memory_order_relaxed);
-    if ((cmp & 1) == 0 &&
-        atomic_compare_exchange_weak(p, &cmp, cmp | 1, memory_order_acquire))
-      return (Node *)cmp;
+    u32 cmp = atomic_load(p, memory_order_relaxed);
+    if ((cmp & kLockMask) == 0 &&
+        atomic_compare_exchange_weak(p, &cmp, cmp | kLockMask,
+                                     memory_order_acquire))
+      return cmp;
    if (i < 10)
      proc_yield(10);
    else
@ -91,73 +106,57 @@ Node *StackDepotBase<Node, kReservedBits, kTabSizeLog>::lock(

 template <class Node, int kReservedBits, int kTabSizeLog>
 void StackDepotBase<Node, kReservedBits, kTabSizeLog>::unlock(
-    atomic_uintptr_t *p, Node *s) {
-  DCHECK_EQ((uptr)s & 1, 0);
-  atomic_store(p, (uptr)s, memory_order_release);
+    atomic_uint32_t *p, u32 s) {
+  DCHECK_EQ(s & kLockMask, 0);
+  atomic_store(p, s, memory_order_release);
 }

 template <class Node, int kReservedBits, int kTabSizeLog>
-typename StackDepotBase<Node, kReservedBits, kTabSizeLog>::handle_type
-StackDepotBase<Node, kReservedBits, kTabSizeLog>::Put(args_type args,
-                                                      bool *inserted) {
-  if (inserted) *inserted = false;
-  if (!Node::is_valid(args)) return handle_type();
+u32 StackDepotBase<Node, kReservedBits, kTabSizeLog>::Put(args_type args,
+                                                          bool *inserted) {
+  if (inserted)
+    *inserted = false;
+  if (!LIKELY(Node::is_valid(args)))
+    return 0;
  hash_type h = Node::hash(args);
-  atomic_uintptr_t *p = &tab[h % kTabSize];
-  uptr v = atomic_load(p, memory_order_consume);
-  Node *s = (Node *)(v & ~1);
+  atomic_uint32_t *p = &tab[h % kTabSize];
+  u32 v = atomic_load(p, memory_order_consume);
+  u32 s = v & kUnlockMask;
  // First, try to find the existing stack.
-  Node *node = find(s, args, h);
-  if (node) return node->get_handle();
+  u32 node = find(s, args, h);
+  if (LIKELY(node))
+    return node;
+
  // If failed, lock, retry and insert new.
-  Node *s2 = lock(p);
+  u32 s2 = lock(p);
  if (s2 != s) {
    node = find(s2, args, h);
    if (node) {
      unlock(p, s2);
-      return node->get_handle();
+      return node;
    }
  }
-  uptr part = (h % kTabSize) / kPartSize;
-  u32 id = atomic_fetch_add(&seq[part], 1, memory_order_relaxed) + 1;
-  stats.n_uniq_ids++;
-  CHECK_LT(id, kMaxId);
-  id |= part << kPartShift;
-  CHECK_NE(id, 0);
-  CHECK_EQ(id & (((u32)-1) >> kReservedBits), id);
-  uptr memsz = Node::storage_size(args);
-  s = (Node *)PersistentAlloc(memsz);
-  stats.allocated += memsz;
-  s->id = id;
-  s->store(args, h);
-  s->link = s2;
+  s = atomic_fetch_add(&n_uniq_ids, 1, memory_order_relaxed) + 1;
+  CHECK_EQ(s & kUnlockMask, s);
+  CHECK_EQ(s & (((u32)-1) >> kReservedBits), s);
+  Node &new_node = nodes[s];
+  new_node.store(s, args, h);
+  new_node.link = s2;
  unlock(p, s);
  if (inserted) *inserted = true;
-  return s->get_handle();
+  return s;
 }

 template <class Node, int kReservedBits, int kTabSizeLog>
 typename StackDepotBase<Node, kReservedBits, kTabSizeLog>::args_type
 StackDepotBase<Node, kReservedBits, kTabSizeLog>::Get(u32 id) {
-  if (id == 0) {
+  if (id == 0)
    return args_type();
-  }
  CHECK_EQ(id & (((u32)-1) >> kReservedBits), id);
-  // High kPartBits contain part id, so we need to scan at most kPartSize lists.
-  uptr part = id >> kPartShift;
-  for (int i = 0; i != kPartSize; i++) {
-    uptr idx = part * kPartSize + i;
-    CHECK_LT(idx, kTabSize);
-    atomic_uintptr_t *p = &tab[idx];
-    uptr v = atomic_load(p, memory_order_consume);
-    Node *s = (Node *)(v & ~1);
-    for (; s; s = s->link) {
-      if (s->id == id) {
-        return s->load();
-      }
-    }
-  }
-  return args_type();
+  if (!nodes.contains(id))
+    return args_type();
+  const Node &node = nodes[id];
+  return node.load(id);
 }

 template <class Node, int kReservedBits, int kTabSizeLog>
@ -170,24 +169,23 @@ void StackDepotBase<Node, kReservedBits, kTabSizeLog>::LockAll() {
 template <class Node, int kReservedBits, int kTabSizeLog>
 void StackDepotBase<Node, kReservedBits, kTabSizeLog>::UnlockAll() {
  for (int i = 0; i < kTabSize; ++i) {
-    atomic_uintptr_t *p = &tab[i];
+    atomic_uint32_t *p = &tab[i];
    uptr s = atomic_load(p, memory_order_relaxed);
-    unlock(p, (Node *)(s & ~1UL));
+    unlock(p, s & kUnlockMask);
  }
 }

 template <class Node, int kReservedBits, int kTabSizeLog>
 void StackDepotBase<Node, kReservedBits, kTabSizeLog>::PrintAll() {
  for (int i = 0; i < kTabSize; ++i) {
-    atomic_uintptr_t *p = &tab[i];
-    lock(p);
-    uptr v = atomic_load(p, memory_order_relaxed);
-    Node *s = (Node *)(v & ~1UL);
-    for (; s; s = s->link) {
-      Printf("Stack for id %u:\n", s->id);
-      s->load().Print();
+    atomic_uint32_t *p = &tab[i];
+    u32 s = atomic_load(p, memory_order_consume) & kUnlockMask;
+    for (; s;) {
+      const Node &node = nodes[s];
+      Printf("Stack for id %u:\n", s);
+      node.load(s).Print();
+      s = node.link;
    }
-    unlock(p, s);
  }
 }

--- a/libsanitizer/sanitizer_common/sanitizer_stacktrace.cpp
+++ b/libsanitizer/sanitizer_common/sanitizer_stacktrace.cpp
@ -24,7 +24,7 @@ uptr StackTrace::GetNextInstructionPc(uptr pc) {
  return pc + 8;
 #elif defined(__powerpc__) || defined(__arm__) || defined(__aarch64__) || \
    defined(__hexagon__)
-  return pc + 4;
+  return STRIP_PAC_PC((void *)pc) + 4;
 #elif SANITIZER_RISCV64
  // Current check order is 4 -> 2 -> 6 -> 8
  u8 InsnByte = *(u8 *)(pc);
@ -86,8 +86,8 @@ static inline uhwptr *GetCanonicFrame(uptr bp,
  // Nope, this does not look right either. This means the frame after next does
  // not have a valid frame pointer, but we can still extract the caller PC.
  // Unfortunately, there is no way to decide between GCC and LLVM frame
-  // layouts. Assume GCC.
-  return bp_prev - 1;
+  // layouts. Assume LLVM.
+  return bp_prev;
 #else
  return (uhwptr*)bp;
 #endif
@ -110,21 +110,14 @@ void BufferedStackTrace::UnwindFast(uptr pc, uptr bp, uptr stack_top,
         IsAligned((uptr)frame, sizeof(*frame)) &&
         size < max_depth) {
 #ifdef __powerpc__
-    // PowerPC ABIs specify that the return address is saved on the
-    // *caller's* stack frame.  Thus we must dereference the back chain
-    // to find the caller frame before extracting it.
+    // PowerPC ABIs specify that the return address is saved at offset
+    // 16 of the *caller's* stack frame.  Thus we must dereference the
+    // back chain to find the caller frame before extracting it.
    uhwptr *caller_frame = (uhwptr*)frame[0];
    if (!IsValidFrame((uptr)caller_frame, stack_top, bottom) ||
        !IsAligned((uptr)caller_frame, sizeof(uhwptr)))
      break;
-    // For most ABIs the offset where the return address is saved is two
-    // register sizes.  The exception is the SVR4 ABI, which uses an
-    // offset of only one register size.
-#ifdef _CALL_SYSV
-    uhwptr pc1 = caller_frame[1];
-#else
    uhwptr pc1 = caller_frame[2];
-#endif
 #elif defined(__s390__)
    uhwptr pc1 = frame[14];
 #elif defined(__riscv)
--- a/libsanitizer/sanitizer_common/sanitizer_symbolizer_report.cpp
+++ b/libsanitizer/sanitizer_common/sanitizer_symbolizer_report.cpp
@ -88,11 +88,17 @@ void ReportErrorSummary(const char *error_type, const StackTrace *stack,
 #endif
 }

-void ReportMmapWriteExec(int prot) {
+void ReportMmapWriteExec(int prot, int flags) {
 #if SANITIZER_POSIX && (!SANITIZER_GO && !SANITIZER_ANDROID)
-  if ((prot & (PROT_WRITE | PROT_EXEC)) != (PROT_WRITE | PROT_EXEC))
+  int pflags = (PROT_WRITE | PROT_EXEC);
+  if ((prot & pflags) != pflags)
    return;

+#  if SANITIZER_MAC && defined(MAP_JIT)
+  if ((flags & MAP_JIT) == MAP_JIT)
+    return;
+#  endif
+
  ScopedErrorReportLock l;
  SanitizerCommonDecorator d;

--- a/libsanitizer/sanitizer_common/sanitizer_tls_get_addr.cpp
+++ b/libsanitizer/sanitizer_common/sanitizer_tls_get_addr.cpp
@ -44,7 +44,7 @@ static atomic_uintptr_t number_of_live_dtls;
 static const uptr kDestroyedThread = -1;

 static void DTLS_Deallocate(DTLS::DTVBlock *block) {
-  VReport(2, "__tls_get_addr: DTLS_Deallocate %p\n", block);
+  VReport(2, "__tls_get_addr: DTLS_Deallocate %p\n", (void *)block);
  UnmapOrDie(block, sizeof(DTLS::DTVBlock));
  atomic_fetch_sub(&number_of_live_dtls, 1, memory_order_relaxed);
 }
@ -66,12 +66,13 @@ static DTLS::DTVBlock *DTLS_NextBlock(atomic_uintptr_t *cur) {
  }
  uptr num_live_dtls =
      atomic_fetch_add(&number_of_live_dtls, 1, memory_order_relaxed);
-  VReport(2, "__tls_get_addr: DTLS_NextBlock %p %zd\n", &dtls, num_live_dtls);
+  VReport(2, "__tls_get_addr: DTLS_NextBlock %p %zd\n", (void *)&dtls,
+          num_live_dtls);
  return new_dtv;
 }

 static DTLS::DTV *DTLS_Find(uptr id) {
-  VReport(2, "__tls_get_addr: DTLS_Find %p %zd\n", &dtls, id);
+  VReport(2, "__tls_get_addr: DTLS_Find %p %zd\n", (void *)&dtls, id);
  static constexpr uptr kPerBlock = ARRAY_SIZE(DTLS::DTVBlock::dtvs);
  DTLS::DTVBlock *cur = DTLS_NextBlock(&dtls.dtv_block);
  if (!cur)
@ -82,7 +83,7 @@ static DTLS::DTV *DTLS_Find(uptr id) {

 void DTLS_Destroy() {
  if (!common_flags()->intercept_tls_get_addr) return;
-  VReport(2, "__tls_get_addr: DTLS_Destroy %p\n", &dtls);
+  VReport(2, "__tls_get_addr: DTLS_Destroy %p\n", (void *)&dtls);
  DTLS::DTVBlock *block = (DTLS::DTVBlock *)atomic_exchange(
      &dtls.dtv_block, kDestroyedThread, memory_order_release);
  while (block) {
@ -120,7 +121,7 @@ DTLS::DTV *DTLS_on_tls_get_addr(void *arg_void, void *res,
  VReport(2,
          "__tls_get_addr: %p {0x%zx,0x%zx} => %p; tls_beg: 0x%zx; sp: %p "
          "num_live_dtls %zd\n",
-          arg, arg->dso_id, arg->offset, res, tls_beg, &tls_beg,
+          (void *)arg, arg->dso_id, arg->offset, res, tls_beg, (void *)&tls_beg,
          atomic_load(&number_of_live_dtls, memory_order_relaxed));
  if (dtls.last_memalign_ptr == tls_beg) {
    tls_size = dtls.last_memalign_size;
--- a/libsanitizer/sanitizer_common/sanitizer_win.cpp
+++ b/libsanitizer/sanitizer_common/sanitizer_win.cpp
@ -1113,7 +1113,7 @@ bool IsProcessRunning(pid_t pid) {
 int WaitForProcess(pid_t pid) { return -1; }

 // FIXME implement on this platform.
-void GetMemoryProfile(fill_profile_f cb, uptr *stats, uptr stats_size) { }
+void GetMemoryProfile(fill_profile_f cb, uptr *stats) {}

 void CheckNoDeepBind(const char *filename, int flag) {
  // Do nothing.
--- a/libsanitizer/tsan/tsan_interceptors.h
+++ b/libsanitizer/tsan/tsan_interceptors.h
@ -38,19 +38,31 @@ inline bool in_symbolizer() {

 }  // namespace __tsan

-#define SCOPED_INTERCEPTOR_RAW(func, ...)      \
-  ThreadState *thr = cur_thread_init();        \
-  const uptr caller_pc = GET_CALLER_PC();      \
-  ScopedInterceptor si(thr, #func, caller_pc); \
-  const uptr pc = GET_CURRENT_PC();            \
-  (void)pc;
+#define SCOPED_INTERCEPTOR_RAW(func, ...)            \
+  ThreadState *thr = cur_thread_init();              \
+  ScopedInterceptor si(thr, #func, GET_CALLER_PC()); \
+  UNUSED const uptr pc = GET_CURRENT_PC();
+
+#ifdef __powerpc64__
+// Debugging of crashes on powerpc after commit:
+// c80604f7a3 ("tsan: remove real func check from interceptors")
+// Somehow replacing if with DCHECK leads to strange failures in:
+// SanitizerCommon-tsan-powerpc64le-Linux :: Linux/ptrace.cpp
+// https://lab.llvm.org/buildbot/#/builders/105
+// https://lab.llvm.org/buildbot/#/builders/121
+// https://lab.llvm.org/buildbot/#/builders/57
+#  define CHECK_REAL_FUNC(func)                                          \
+    if (REAL(func) == 0) {                                               \
+      Report("FATAL: ThreadSanitizer: failed to intercept %s\n", #func); \
+      Die();                                                             \
+    }
+#else
+#  define CHECK_REAL_FUNC(func) DCHECK(REAL(func))
+#endif

 #define SCOPED_TSAN_INTERCEPTOR(func, ...)                                \
  SCOPED_INTERCEPTOR_RAW(func, __VA_ARGS__);                              \
-  if (REAL(func) == 0) {                                                  \
-    Report("FATAL: ThreadSanitizer: failed to intercept %s\n", #func);    \
-    Die();                                                                \
-  }                                                                       \
+  CHECK_REAL_FUNC(func);                                                  \
  if (!thr->is_inited || thr->ignore_interceptors || thr->in_ignored_lib) \
    return REAL(func)(__VA_ARGS__);

--- a/libsanitizer/tsan/tsan_mutexset.cpp
+++ b/libsanitizer/tsan/tsan_mutexset.cpp
@ -10,6 +10,8 @@
 //
 //===----------------------------------------------------------------------===//
 #include "tsan_mutexset.h"
+
+#include "sanitizer_common/sanitizer_placement_new.h"
 #include "tsan_rtl.h"

 namespace __tsan {
@ -124,4 +126,7 @@ MutexSet::Desc MutexSet::Get(uptr i) const {
  return descs_[i];
 }

+DynamicMutexSet::DynamicMutexSet() : ptr_(New<MutexSet>()) {}
+DynamicMutexSet::~DynamicMutexSet() { DestroyAndFree(ptr_); }
+
 }  // namespace __tsan
--- a/libsanitizer/tsan/tsan_mutexset.h
+++ b/libsanitizer/tsan/tsan_mutexset.h
@ -59,6 +59,24 @@ class MutexSet {
 #endif
 };

+// MutexSet is too large to live on stack.
+// DynamicMutexSet can be use used to create local MutexSet's.
+class DynamicMutexSet {
+ public:
+  DynamicMutexSet();
+  ~DynamicMutexSet();
+  MutexSet* operator->() { return ptr_; }
+  operator MutexSet*() { return ptr_; }
+  DynamicMutexSet(const DynamicMutexSet&) = delete;
+  DynamicMutexSet& operator=(const DynamicMutexSet&) = delete;
+
+ private:
+  MutexSet* ptr_;
+#if SANITIZER_GO
+  MutexSet set_;
+#endif
+};
+
 // Go does not have mutexes, so do not spend memory and time.
 // (Go sync.Mutex is actually a semaphore -- can be unlocked
 // in different goroutine).
@ -71,6 +89,8 @@ void MutexSet::AddAddr(uptr addr, StackID stack_id, bool write) {}
 void MutexSet::DelAddr(uptr addr, bool destroy) {}
 uptr MutexSet::Size() const { return 0; }
 MutexSet::Desc MutexSet::Get(uptr i) const { return Desc(); }
+DynamicMutexSet::DynamicMutexSet() : ptr_(&set_) {}
+DynamicMutexSet::~DynamicMutexSet() {}
 #endif

 }  // namespace __tsan
--- a/libsanitizer/tsan/tsan_platform.h
+++ b/libsanitizer/tsan/tsan_platform.h
@ -906,7 +906,7 @@ struct RestoreAddrImpl {
    // 3 bits of the compressed addr match that of the app range. If yes, we
    // assume that the compressed address come from that range and restore the
    // missing top bits to match the app range address.
-    static constexpr uptr ranges[] = {
+    const uptr ranges[] = {
        Mapping::kLoAppMemBeg,  Mapping::kLoAppMemEnd, Mapping::kMidAppMemBeg,
        Mapping::kMidAppMemEnd, Mapping::kHiAppMemBeg, Mapping::kHiAppMemEnd,
        Mapping::kHeapMemBeg,   Mapping::kHeapMemEnd,
--- a/libsanitizer/tsan/tsan_platform_linux.cpp
+++ b/libsanitizer/tsan/tsan_platform_linux.cpp
@ -100,8 +100,7 @@ enum {
  MemCount,
 };

-void FillProfileCallback(uptr p, uptr rss, bool file,
-                         uptr *mem, uptr stats_size) {
+void FillProfileCallback(uptr p, uptr rss, bool file, uptr *mem) {
  mem[MemTotal] += rss;
  if (p >= ShadowBeg() && p < ShadowEnd())
    mem[MemShadow] += rss;
@ -122,7 +121,7 @@ void FillProfileCallback(uptr p, uptr rss, bool file,
 void WriteMemoryProfile(char *buf, uptr buf_size, u64 uptime_ns) {
  uptr mem[MemCount];
  internal_memset(mem, 0, sizeof(mem));
-  GetMemoryProfile(FillProfileCallback, mem, MemCount);
+  GetMemoryProfile(FillProfileCallback, mem);
  auto meta = ctx->metamap.GetMemoryStats();
  StackDepotStats stacks = StackDepotGetStats();
  uptr nthread, nlive;
--- a/libsanitizer/tsan/tsan_platform_mac.cpp
+++ b/libsanitizer/tsan/tsan_platform_mac.cpp
@ -240,7 +240,7 @@ void InitializePlatformEarly() {
  uptr max_vm = GetMaxUserVirtualAddress() + 1;
  if (max_vm != HiAppMemEnd()) {
    Printf("ThreadSanitizer: unsupported vm address limit %p, expected %p.\n",
-           max_vm, HiAppMemEnd());
+           (void *)max_vm, (void *)HiAppMemEnd());
    Die();
  }
 #endif
--- a/libsanitizer/tsan/tsan_rtl.cpp
+++ b/libsanitizer/tsan/tsan_rtl.cpp
@ -567,123 +567,6 @@ StackID CurrentStackId(ThreadState *thr, uptr pc) {

 namespace v3 {

-ALWAYS_INLINE USED bool TryTraceMemoryAccess(ThreadState *thr, uptr pc,
-                                             uptr addr, uptr size,
-                                             AccessType typ) {
-  DCHECK(size == 1 || size == 2 || size == 4 || size == 8);
-  if (!kCollectHistory)
-    return true;
-  EventAccess *ev;
-  if (UNLIKELY(!TraceAcquire(thr, &ev)))
-    return false;
-  u64 size_log = size == 1 ? 0 : size == 2 ? 1 : size == 4 ? 2 : 3;
-  uptr pc_delta = pc - thr->trace_prev_pc + (1 << (EventAccess::kPCBits - 1));
-  thr->trace_prev_pc = pc;
-  if (LIKELY(pc_delta < (1 << EventAccess::kPCBits))) {
-    ev->is_access = 1;
-    ev->is_read = !!(typ & kAccessRead);
-    ev->is_atomic = !!(typ & kAccessAtomic);
-    ev->size_log = size_log;
-    ev->pc_delta = pc_delta;
-    DCHECK_EQ(ev->pc_delta, pc_delta);
-    ev->addr = CompressAddr(addr);
-    TraceRelease(thr, ev);
-    return true;
-  }
-  auto *evex = reinterpret_cast<EventAccessExt *>(ev);
-  evex->is_access = 0;
-  evex->is_func = 0;
-  evex->type = EventType::kAccessExt;
-  evex->is_read = !!(typ & kAccessRead);
-  evex->is_atomic = !!(typ & kAccessAtomic);
-  evex->size_log = size_log;
-  evex->addr = CompressAddr(addr);
-  evex->pc = pc;
-  TraceRelease(thr, evex);
-  return true;
-}
-
-ALWAYS_INLINE USED bool TryTraceMemoryAccessRange(ThreadState *thr, uptr pc,
-                                                  uptr addr, uptr size,
-                                                  AccessType typ) {
-  if (!kCollectHistory)
-    return true;
-  EventAccessRange *ev;
-  if (UNLIKELY(!TraceAcquire(thr, &ev)))
-    return false;
-  thr->trace_prev_pc = pc;
-  ev->is_access = 0;
-  ev->is_func = 0;
-  ev->type = EventType::kAccessRange;
-  ev->is_read = !!(typ & kAccessRead);
-  ev->is_free = !!(typ & kAccessFree);
-  ev->size_lo = size;
-  ev->pc = CompressAddr(pc);
-  ev->addr = CompressAddr(addr);
-  ev->size_hi = size >> EventAccessRange::kSizeLoBits;
-  TraceRelease(thr, ev);
-  return true;
-}
-
-void TraceMemoryAccessRange(ThreadState *thr, uptr pc, uptr addr, uptr size,
-                            AccessType typ) {
-  if (LIKELY(TryTraceMemoryAccessRange(thr, pc, addr, size, typ)))
-    return;
-  TraceSwitchPart(thr);
-  UNUSED bool res = TryTraceMemoryAccessRange(thr, pc, addr, size, typ);
-  DCHECK(res);
-}
-
-void TraceFunc(ThreadState *thr, uptr pc) {
-  if (LIKELY(TryTraceFunc(thr, pc)))
-    return;
-  TraceSwitchPart(thr);
-  UNUSED bool res = TryTraceFunc(thr, pc);
-  DCHECK(res);
-}
-
-void TraceMutexLock(ThreadState *thr, EventType type, uptr pc, uptr addr,
-                    StackID stk) {
-  DCHECK(type == EventType::kLock || type == EventType::kRLock);
-  if (!kCollectHistory)
-    return;
-  EventLock ev;
-  ev.is_access = 0;
-  ev.is_func = 0;
-  ev.type = type;
-  ev.pc = CompressAddr(pc);
-  ev.stack_lo = stk;
-  ev.stack_hi = stk >> EventLock::kStackIDLoBits;
-  ev._ = 0;
-  ev.addr = CompressAddr(addr);
-  TraceEvent(thr, ev);
-}
-
-void TraceMutexUnlock(ThreadState *thr, uptr addr) {
-  if (!kCollectHistory)
-    return;
-  EventUnlock ev;
-  ev.is_access = 0;
-  ev.is_func = 0;
-  ev.type = EventType::kUnlock;
-  ev._ = 0;
-  ev.addr = CompressAddr(addr);
-  TraceEvent(thr, ev);
-}
-
-void TraceTime(ThreadState *thr) {
-  if (!kCollectHistory)
-    return;
-  EventTime ev;
-  ev.is_access = 0;
-  ev.is_func = 0;
-  ev.type = EventType::kTime;
-  ev.sid = static_cast<u64>(thr->sid);
-  ev.epoch = static_cast<u64>(thr->epoch);
-  ev._ = 0;
-  TraceEvent(thr, ev);
-}
-
 NOINLINE
 void TraceSwitchPart(ThreadState *thr) {
  Trace *trace = &thr->tctx->trace;
@ -789,427 +672,6 @@ extern "C" void __tsan_report_race() {
 }
 #endif

-ALWAYS_INLINE
-Shadow LoadShadow(u64 *p) {
-  u64 raw = atomic_load((atomic_uint64_t*)p, memory_order_relaxed);
-  return Shadow(raw);
-}
-
-ALWAYS_INLINE
-void StoreShadow(u64 *sp, u64 s) {
-  atomic_store((atomic_uint64_t*)sp, s, memory_order_relaxed);
-}
-
-ALWAYS_INLINE
-void StoreIfNotYetStored(u64 *sp, u64 *s) {
-  StoreShadow(sp, *s);
-  *s = 0;
-}
-
-ALWAYS_INLINE
-void HandleRace(ThreadState *thr, u64 *shadow_mem,
-                              Shadow cur, Shadow old) {
-  thr->racy_state[0] = cur.raw();
-  thr->racy_state[1] = old.raw();
-  thr->racy_shadow_addr = shadow_mem;
-#if !SANITIZER_GO
-  HACKY_CALL(__tsan_report_race);
-#else
-  ReportRace(thr);
-#endif
-}
-
-static inline bool HappensBefore(Shadow old, ThreadState *thr) {
-  return thr->clock.get(old.TidWithIgnore()) >= old.epoch();
-}
-
-ALWAYS_INLINE
-void MemoryAccessImpl1(ThreadState *thr, uptr addr,
-    int kAccessSizeLog, bool kAccessIsWrite, bool kIsAtomic,
-    u64 *shadow_mem, Shadow cur) {
-
-  // This potentially can live in an MMX/SSE scratch register.
-  // The required intrinsics are:
-  // __m128i _mm_move_epi64(__m128i*);
-  // _mm_storel_epi64(u64*, __m128i);
-  u64 store_word = cur.raw();
-  bool stored = false;
-
-  // scan all the shadow values and dispatch to 4 categories:
-  // same, replace, candidate and race (see comments below).
-  // we consider only 3 cases regarding access sizes:
-  // equal, intersect and not intersect. initially I considered
-  // larger and smaller as well, it allowed to replace some
-  // 'candidates' with 'same' or 'replace', but I think
-  // it's just not worth it (performance- and complexity-wise).
-
-  Shadow old(0);
-
-  // It release mode we manually unroll the loop,
-  // because empirically gcc generates better code this way.
-  // However, we can't afford unrolling in debug mode, because the function
-  // consumes almost 4K of stack. Gtest gives only 4K of stack to death test
-  // threads, which is not enough for the unrolled loop.
-#if SANITIZER_DEBUG
-  for (int idx = 0; idx < 4; idx++) {
-#  include "tsan_update_shadow_word.inc"
-  }
-#else
-  int idx = 0;
-#  include "tsan_update_shadow_word.inc"
-  idx = 1;
-  if (stored) {
-#  include "tsan_update_shadow_word.inc"
-  } else {
-#  include "tsan_update_shadow_word.inc"
-  }
-  idx = 2;
-  if (stored) {
-#  include "tsan_update_shadow_word.inc"
-  } else {
-#  include "tsan_update_shadow_word.inc"
-  }
-  idx = 3;
-  if (stored) {
-#  include "tsan_update_shadow_word.inc"
-  } else {
-#  include "tsan_update_shadow_word.inc"
-  }
-#endif
-
-  // we did not find any races and had already stored
-  // the current access info, so we are done
-  if (LIKELY(stored))
-    return;
-  // choose a random candidate slot and replace it
-  StoreShadow(shadow_mem + (cur.epoch() % kShadowCnt), store_word);
-  return;
- RACE:
-  HandleRace(thr, shadow_mem, cur, old);
-  return;
-}
-
-void UnalignedMemoryAccess(ThreadState *thr, uptr pc, uptr addr, uptr size,
-                           AccessType typ) {
-  DCHECK(!(typ & kAccessAtomic));
-  const bool kAccessIsWrite = !(typ & kAccessRead);
-  const bool kIsAtomic = false;
-  while (size) {
-    int size1 = 1;
-    int kAccessSizeLog = kSizeLog1;
-    if (size >= 8 && (addr & ~7) == ((addr + 7) & ~7)) {
-      size1 = 8;
-      kAccessSizeLog = kSizeLog8;
-    } else if (size >= 4 && (addr & ~7) == ((addr + 3) & ~7)) {
-      size1 = 4;
-      kAccessSizeLog = kSizeLog4;
-    } else if (size >= 2 && (addr & ~7) == ((addr + 1) & ~7)) {
-      size1 = 2;
-      kAccessSizeLog = kSizeLog2;
-    }
-    MemoryAccess(thr, pc, addr, kAccessSizeLog, kAccessIsWrite, kIsAtomic);
-    addr += size1;
-    size -= size1;
-  }
-}
-
-ALWAYS_INLINE
-bool ContainsSameAccessSlow(u64 *s, u64 a, u64 sync_epoch, bool is_write) {
-  Shadow cur(a);
-  for (uptr i = 0; i < kShadowCnt; i++) {
-    Shadow old(LoadShadow(&s[i]));
-    if (Shadow::Addr0AndSizeAreEqual(cur, old) &&
-        old.TidWithIgnore() == cur.TidWithIgnore() &&
-        old.epoch() > sync_epoch &&
-        old.IsAtomic() == cur.IsAtomic() &&
-        old.IsRead() <= cur.IsRead())
-      return true;
-  }
-  return false;
-}
-
-#if TSAN_VECTORIZE
-#  define SHUF(v0, v1, i0, i1, i2, i3)                    \
-    _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(v0), \
-                                    _mm_castsi128_ps(v1), \
-                                    (i0)*1 + (i1)*4 + (i2)*16 + (i3)*64))
-ALWAYS_INLINE
-bool ContainsSameAccessFast(u64 *s, u64 a, u64 sync_epoch, bool is_write) {
-  // This is an optimized version of ContainsSameAccessSlow.
-  // load current access into access[0:63]
-  const m128 access     = _mm_cvtsi64_si128(a);
-  // duplicate high part of access in addr0:
-  // addr0[0:31]        = access[32:63]
-  // addr0[32:63]       = access[32:63]
-  // addr0[64:95]       = access[32:63]
-  // addr0[96:127]      = access[32:63]
-  const m128 addr0      = SHUF(access, access, 1, 1, 1, 1);
-  // load 4 shadow slots
-  const m128 shadow0    = _mm_load_si128((__m128i*)s);
-  const m128 shadow1    = _mm_load_si128((__m128i*)s + 1);
-  // load high parts of 4 shadow slots into addr_vect:
-  // addr_vect[0:31]    = shadow0[32:63]
-  // addr_vect[32:63]   = shadow0[96:127]
-  // addr_vect[64:95]   = shadow1[32:63]
-  // addr_vect[96:127]  = shadow1[96:127]
-  m128 addr_vect        = SHUF(shadow0, shadow1, 1, 3, 1, 3);
-  if (!is_write) {
-    // set IsRead bit in addr_vect
-    const m128 rw_mask1 = _mm_cvtsi64_si128(1<<15);
-    const m128 rw_mask  = SHUF(rw_mask1, rw_mask1, 0, 0, 0, 0);
-    addr_vect           = _mm_or_si128(addr_vect, rw_mask);
-  }
-  // addr0 == addr_vect?
-  const m128 addr_res   = _mm_cmpeq_epi32(addr0, addr_vect);
-  // epoch1[0:63]       = sync_epoch
-  const m128 epoch1     = _mm_cvtsi64_si128(sync_epoch);
-  // epoch[0:31]        = sync_epoch[0:31]
-  // epoch[32:63]       = sync_epoch[0:31]
-  // epoch[64:95]       = sync_epoch[0:31]
-  // epoch[96:127]      = sync_epoch[0:31]
-  const m128 epoch      = SHUF(epoch1, epoch1, 0, 0, 0, 0);
-  // load low parts of shadow cell epochs into epoch_vect:
-  // epoch_vect[0:31]   = shadow0[0:31]
-  // epoch_vect[32:63]  = shadow0[64:95]
-  // epoch_vect[64:95]  = shadow1[0:31]
-  // epoch_vect[96:127] = shadow1[64:95]
-  const m128 epoch_vect = SHUF(shadow0, shadow1, 0, 2, 0, 2);
-  // epoch_vect >= sync_epoch?
-  const m128 epoch_res  = _mm_cmpgt_epi32(epoch_vect, epoch);
-  // addr_res & epoch_res
-  const m128 res        = _mm_and_si128(addr_res, epoch_res);
-  // mask[0] = res[7]
-  // mask[1] = res[15]
-  // ...
-  // mask[15] = res[127]
-  const int mask        = _mm_movemask_epi8(res);
-  return mask != 0;
-}
-#endif
-
-ALWAYS_INLINE
-bool ContainsSameAccess(u64 *s, u64 a, u64 sync_epoch, bool is_write) {
-#if TSAN_VECTORIZE
-  bool res = ContainsSameAccessFast(s, a, sync_epoch, is_write);
-  // NOTE: this check can fail if the shadow is concurrently mutated
-  // by other threads. But it still can be useful if you modify
-  // ContainsSameAccessFast and want to ensure that it's not completely broken.
-  // DCHECK_EQ(res, ContainsSameAccessSlow(s, a, sync_epoch, is_write));
-  return res;
-#else
-  return ContainsSameAccessSlow(s, a, sync_epoch, is_write);
-#endif
-}
-
-ALWAYS_INLINE USED
-void MemoryAccess(ThreadState *thr, uptr pc, uptr addr,
-    int kAccessSizeLog, bool kAccessIsWrite, bool kIsAtomic) {
-  RawShadow *shadow_mem = MemToShadow(addr);
-  DPrintf2("#%d: MemoryAccess: @%p %p size=%d"
-      " is_write=%d shadow_mem=%p {%zx, %zx, %zx, %zx}\n",
-      (int)thr->fast_state.tid(), (void*)pc, (void*)addr,
-      (int)(1 << kAccessSizeLog), kAccessIsWrite, shadow_mem,
-      (uptr)shadow_mem[0], (uptr)shadow_mem[1],
-      (uptr)shadow_mem[2], (uptr)shadow_mem[3]);
-#if SANITIZER_DEBUG
-  if (!IsAppMem(addr)) {
-    Printf("Access to non app mem %zx\n", addr);
-    DCHECK(IsAppMem(addr));
-  }
-  if (!IsShadowMem(shadow_mem)) {
-    Printf("Bad shadow addr %p (%zx)\n", shadow_mem, addr);
-    DCHECK(IsShadowMem(shadow_mem));
-  }
-#endif
-
-  if (!SANITIZER_GO && !kAccessIsWrite && *shadow_mem == kShadowRodata) {
-    // Access to .rodata section, no races here.
-    // Measurements show that it can be 10-20% of all memory accesses.
-    return;
-  }
-
-  FastState fast_state = thr->fast_state;
-  if (UNLIKELY(fast_state.GetIgnoreBit())) {
-    return;
-  }
-
-  Shadow cur(fast_state);
-  cur.SetAddr0AndSizeLog(addr & 7, kAccessSizeLog);
-  cur.SetWrite(kAccessIsWrite);
-  cur.SetAtomic(kIsAtomic);
-
-  if (LIKELY(ContainsSameAccess(shadow_mem, cur.raw(),
-      thr->fast_synch_epoch, kAccessIsWrite))) {
-    return;
-  }
-
-  if (kCollectHistory) {
-    fast_state.IncrementEpoch();
-    thr->fast_state = fast_state;
-    TraceAddEvent(thr, fast_state, EventTypeMop, pc);
-    cur.IncrementEpoch();
-  }
-
-  MemoryAccessImpl1(thr, addr, kAccessSizeLog, kAccessIsWrite, kIsAtomic,
-      shadow_mem, cur);
-}
-
-// Called by MemoryAccessRange in tsan_rtl_thread.cpp
-ALWAYS_INLINE USED
-void MemoryAccessImpl(ThreadState *thr, uptr addr,
-    int kAccessSizeLog, bool kAccessIsWrite, bool kIsAtomic,
-    u64 *shadow_mem, Shadow cur) {
-  if (LIKELY(ContainsSameAccess(shadow_mem, cur.raw(),
-      thr->fast_synch_epoch, kAccessIsWrite))) {
-    return;
-  }
-
-  MemoryAccessImpl1(thr, addr, kAccessSizeLog, kAccessIsWrite, kIsAtomic,
-      shadow_mem, cur);
-}
-
-static void MemoryRangeSet(ThreadState *thr, uptr pc, uptr addr, uptr size,
-                           u64 val) {
-  (void)thr;
-  (void)pc;
-  if (size == 0)
-    return;
-  // FIXME: fix me.
-  uptr offset = addr % kShadowCell;
-  if (offset) {
-    offset = kShadowCell - offset;
-    if (size <= offset)
-      return;
-    addr += offset;
-    size -= offset;
-  }
-  DCHECK_EQ(addr % 8, 0);
-  // If a user passes some insane arguments (memset(0)),
-  // let it just crash as usual.
-  if (!IsAppMem(addr) || !IsAppMem(addr + size - 1))
-    return;
-  // Don't want to touch lots of shadow memory.
-  // If a program maps 10MB stack, there is no need reset the whole range.
-  size = (size + (kShadowCell - 1)) & ~(kShadowCell - 1);
-  // UnmapOrDie/MmapFixedNoReserve does not work on Windows.
-  if (SANITIZER_WINDOWS || size < common_flags()->clear_shadow_mmap_threshold) {
-    RawShadow *p = MemToShadow(addr);
-    CHECK(IsShadowMem(p));
-    CHECK(IsShadowMem(p + size * kShadowCnt / kShadowCell - 1));
-    // FIXME: may overwrite a part outside the region
-    for (uptr i = 0; i < size / kShadowCell * kShadowCnt;) {
-      p[i++] = val;
-      for (uptr j = 1; j < kShadowCnt; j++)
-        p[i++] = 0;
-    }
-  } else {
-    // The region is big, reset only beginning and end.
-    const uptr kPageSize = GetPageSizeCached();
-    RawShadow *begin = MemToShadow(addr);
-    RawShadow *end = begin + size / kShadowCell * kShadowCnt;
-    RawShadow *p = begin;
-    // Set at least first kPageSize/2 to page boundary.
-    while ((p < begin + kPageSize / kShadowSize / 2) || ((uptr)p % kPageSize)) {
-      *p++ = val;
-      for (uptr j = 1; j < kShadowCnt; j++)
-        *p++ = 0;
-    }
-    // Reset middle part.
-    RawShadow *p1 = p;
-    p = RoundDown(end, kPageSize);
-    if (!MmapFixedSuperNoReserve((uptr)p1, (uptr)p - (uptr)p1))
-      Die();
-    // Set the ending.
-    while (p < end) {
-      *p++ = val;
-      for (uptr j = 1; j < kShadowCnt; j++)
-        *p++ = 0;
-    }
-  }
-}
-
-void MemoryResetRange(ThreadState *thr, uptr pc, uptr addr, uptr size) {
-  MemoryRangeSet(thr, pc, addr, size, 0);
-}
-
-void MemoryRangeFreed(ThreadState *thr, uptr pc, uptr addr, uptr size) {
-  // Processing more than 1k (4k of shadow) is expensive,
-  // can cause excessive memory consumption (user does not necessary touch
-  // the whole range) and most likely unnecessary.
-  if (size > 1024)
-    size = 1024;
-  CHECK_EQ(thr->is_freeing, false);
-  thr->is_freeing = true;
-  MemoryAccessRange(thr, pc, addr, size, true);
-  thr->is_freeing = false;
-  if (kCollectHistory) {
-    thr->fast_state.IncrementEpoch();
-    TraceAddEvent(thr, thr->fast_state, EventTypeMop, pc);
-  }
-  Shadow s(thr->fast_state);
-  s.ClearIgnoreBit();
-  s.MarkAsFreed();
-  s.SetWrite(true);
-  s.SetAddr0AndSizeLog(0, 3);
-  MemoryRangeSet(thr, pc, addr, size, s.raw());
-}
-
-void MemoryRangeImitateWrite(ThreadState *thr, uptr pc, uptr addr, uptr size) {
-  if (kCollectHistory) {
-    thr->fast_state.IncrementEpoch();
-    TraceAddEvent(thr, thr->fast_state, EventTypeMop, pc);
-  }
-  Shadow s(thr->fast_state);
-  s.ClearIgnoreBit();
-  s.SetWrite(true);
-  s.SetAddr0AndSizeLog(0, 3);
-  MemoryRangeSet(thr, pc, addr, size, s.raw());
-}
-
-void MemoryRangeImitateWriteOrResetRange(ThreadState *thr, uptr pc, uptr addr,
-                                         uptr size) {
-  if (thr->ignore_reads_and_writes == 0)
-    MemoryRangeImitateWrite(thr, pc, addr, size);
-  else
-    MemoryResetRange(thr, pc, addr, size);
-}
-
-ALWAYS_INLINE USED
-void FuncEntry(ThreadState *thr, uptr pc) {
-  DPrintf2("#%d: FuncEntry %p\n", (int)thr->fast_state.tid(), (void*)pc);
-  if (kCollectHistory) {
-    thr->fast_state.IncrementEpoch();
-    TraceAddEvent(thr, thr->fast_state, EventTypeFuncEnter, pc);
-  }
-
-  // Shadow stack maintenance can be replaced with
-  // stack unwinding during trace switch (which presumably must be faster).
-  DCHECK_GE(thr->shadow_stack_pos, thr->shadow_stack);
-#if !SANITIZER_GO
-  DCHECK_LT(thr->shadow_stack_pos, thr->shadow_stack_end);
-#else
-  if (thr->shadow_stack_pos == thr->shadow_stack_end)
-    GrowShadowStack(thr);
-#endif
-  thr->shadow_stack_pos[0] = pc;
-  thr->shadow_stack_pos++;
-}
-
-ALWAYS_INLINE USED
-void FuncExit(ThreadState *thr) {
-  DPrintf2("#%d: FuncExit\n", (int)thr->fast_state.tid());
-  if (kCollectHistory) {
-    thr->fast_state.IncrementEpoch();
-    TraceAddEvent(thr, thr->fast_state, EventTypeFuncExit, 0);
-  }
-
-  DCHECK_GT(thr->shadow_stack_pos, thr->shadow_stack);
-#if !SANITIZER_GO
-  DCHECK_LT(thr->shadow_stack_pos, thr->shadow_stack_end);
-#endif
-  thr->shadow_stack_pos--;
-}
-
 void ThreadIgnoreBegin(ThreadState *thr, uptr pc) {
  DPrintf("#%d: ThreadIgnoreBegin\n", thr->tid);
  thr->ignore_reads_and_writes++;
@ -1293,8 +755,3 @@ MutexMeta mutex_meta[] = {
 void PrintMutexPC(uptr pc) { StackTrace(&pc, 1).Print(); }
 }  // namespace __sanitizer
 #endif
-
-#if !SANITIZER_GO
-// Must be included in this file to make sure everything is inlined.
-#  include "tsan_interface.inc"
-#endif
--- a/libsanitizer/tsan/tsan_rtl.h
+++ b/libsanitizer/tsan/tsan_rtl.h
@ -749,6 +749,44 @@ void TraceTime(ThreadState *thr);

 }  // namespace v3

+void GrowShadowStack(ThreadState *thr);
+
+ALWAYS_INLINE
+void FuncEntry(ThreadState *thr, uptr pc) {
+  DPrintf2("#%d: FuncEntry %p\n", (int)thr->fast_state.tid(), (void *)pc);
+  if (kCollectHistory) {
+    thr->fast_state.IncrementEpoch();
+    TraceAddEvent(thr, thr->fast_state, EventTypeFuncEnter, pc);
+  }
+
+  // Shadow stack maintenance can be replaced with
+  // stack unwinding during trace switch (which presumably must be faster).
+  DCHECK_GE(thr->shadow_stack_pos, thr->shadow_stack);
+#if !SANITIZER_GO
+  DCHECK_LT(thr->shadow_stack_pos, thr->shadow_stack_end);
+#else
+  if (thr->shadow_stack_pos == thr->shadow_stack_end)
+    GrowShadowStack(thr);
+#endif
+  thr->shadow_stack_pos[0] = pc;
+  thr->shadow_stack_pos++;
+}
+
+ALWAYS_INLINE
+void FuncExit(ThreadState *thr) {
+  DPrintf2("#%d: FuncExit\n", (int)thr->fast_state.tid());
+  if (kCollectHistory) {
+    thr->fast_state.IncrementEpoch();
+    TraceAddEvent(thr, thr->fast_state, EventTypeFuncExit, 0);
+  }
+
+  DCHECK_GT(thr->shadow_stack_pos, thr->shadow_stack);
+#if !SANITIZER_GO
+  DCHECK_LT(thr->shadow_stack_pos, thr->shadow_stack_end);
+#endif
+  thr->shadow_stack_pos--;
+}
+
 #if !SANITIZER_GO
 extern void (*on_initialize)(void);
 extern int (*on_finalize)(int);
--- a/libsanitizer/tsan/tsan_rtl_access.cpp
+++ b/libsanitizer/tsan/tsan_rtl_access.cpp
@ -0,0 +1,604 @@
+//===-- tsan_rtl_access.cpp -----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+// Definitions of memory access and function entry/exit entry points.
+//===----------------------------------------------------------------------===//
+
+#include "tsan_rtl.h"
+
+namespace __tsan {
+
+namespace v3 {
+
+ALWAYS_INLINE USED bool TryTraceMemoryAccess(ThreadState *thr, uptr pc,
+                                             uptr addr, uptr size,
+                                             AccessType typ) {
+  DCHECK(size == 1 || size == 2 || size == 4 || size == 8);
+  if (!kCollectHistory)
+    return true;
+  EventAccess *ev;
+  if (UNLIKELY(!TraceAcquire(thr, &ev)))
+    return false;
+  u64 size_log = size == 1 ? 0 : size == 2 ? 1 : size == 4 ? 2 : 3;
+  uptr pc_delta = pc - thr->trace_prev_pc + (1 << (EventAccess::kPCBits - 1));
+  thr->trace_prev_pc = pc;
+  if (LIKELY(pc_delta < (1 << EventAccess::kPCBits))) {
+    ev->is_access = 1;
+    ev->is_read = !!(typ & kAccessRead);
+    ev->is_atomic = !!(typ & kAccessAtomic);
+    ev->size_log = size_log;
+    ev->pc_delta = pc_delta;
+    DCHECK_EQ(ev->pc_delta, pc_delta);
+    ev->addr = CompressAddr(addr);
+    TraceRelease(thr, ev);
+    return true;
+  }
+  auto *evex = reinterpret_cast<EventAccessExt *>(ev);
+  evex->is_access = 0;
+  evex->is_func = 0;
+  evex->type = EventType::kAccessExt;
+  evex->is_read = !!(typ & kAccessRead);
+  evex->is_atomic = !!(typ & kAccessAtomic);
+  evex->size_log = size_log;
+  evex->addr = CompressAddr(addr);
+  evex->pc = pc;
+  TraceRelease(thr, evex);
+  return true;
+}
+
+ALWAYS_INLINE USED bool TryTraceMemoryAccessRange(ThreadState *thr, uptr pc,
+                                                  uptr addr, uptr size,
+                                                  AccessType typ) {
+  if (!kCollectHistory)
+    return true;
+  EventAccessRange *ev;
+  if (UNLIKELY(!TraceAcquire(thr, &ev)))
+    return false;
+  thr->trace_prev_pc = pc;
+  ev->is_access = 0;
+  ev->is_func = 0;
+  ev->type = EventType::kAccessRange;
+  ev->is_read = !!(typ & kAccessRead);
+  ev->is_free = !!(typ & kAccessFree);
+  ev->size_lo = size;
+  ev->pc = CompressAddr(pc);
+  ev->addr = CompressAddr(addr);
+  ev->size_hi = size >> EventAccessRange::kSizeLoBits;
+  TraceRelease(thr, ev);
+  return true;
+}
+
+void TraceMemoryAccessRange(ThreadState *thr, uptr pc, uptr addr, uptr size,
+                            AccessType typ) {
+  if (LIKELY(TryTraceMemoryAccessRange(thr, pc, addr, size, typ)))
+    return;
+  TraceSwitchPart(thr);
+  UNUSED bool res = TryTraceMemoryAccessRange(thr, pc, addr, size, typ);
+  DCHECK(res);
+}
+
+void TraceFunc(ThreadState *thr, uptr pc) {
+  if (LIKELY(TryTraceFunc(thr, pc)))
+    return;
+  TraceSwitchPart(thr);
+  UNUSED bool res = TryTraceFunc(thr, pc);
+  DCHECK(res);
+}
+
+void TraceMutexLock(ThreadState *thr, EventType type, uptr pc, uptr addr,
+                    StackID stk) {
+  DCHECK(type == EventType::kLock || type == EventType::kRLock);
+  if (!kCollectHistory)
+    return;
+  EventLock ev;
+  ev.is_access = 0;
+  ev.is_func = 0;
+  ev.type = type;
+  ev.pc = CompressAddr(pc);
+  ev.stack_lo = stk;
+  ev.stack_hi = stk >> EventLock::kStackIDLoBits;
+  ev._ = 0;
+  ev.addr = CompressAddr(addr);
+  TraceEvent(thr, ev);
+}
+
+void TraceMutexUnlock(ThreadState *thr, uptr addr) {
+  if (!kCollectHistory)
+    return;
+  EventUnlock ev;
+  ev.is_access = 0;
+  ev.is_func = 0;
+  ev.type = EventType::kUnlock;
+  ev._ = 0;
+  ev.addr = CompressAddr(addr);
+  TraceEvent(thr, ev);
+}
+
+void TraceTime(ThreadState *thr) {
+  if (!kCollectHistory)
+    return;
+  EventTime ev;
+  ev.is_access = 0;
+  ev.is_func = 0;
+  ev.type = EventType::kTime;
+  ev.sid = static_cast<u64>(thr->sid);
+  ev.epoch = static_cast<u64>(thr->epoch);
+  ev._ = 0;
+  TraceEvent(thr, ev);
+}
+
+}  // namespace v3
+
+ALWAYS_INLINE
+Shadow LoadShadow(u64 *p) {
+  u64 raw = atomic_load((atomic_uint64_t *)p, memory_order_relaxed);
+  return Shadow(raw);
+}
+
+ALWAYS_INLINE
+void StoreShadow(u64 *sp, u64 s) {
+  atomic_store((atomic_uint64_t *)sp, s, memory_order_relaxed);
+}
+
+ALWAYS_INLINE
+void StoreIfNotYetStored(u64 *sp, u64 *s) {
+  StoreShadow(sp, *s);
+  *s = 0;
+}
+
+extern "C" void __tsan_report_race();
+
+ALWAYS_INLINE
+void HandleRace(ThreadState *thr, u64 *shadow_mem, Shadow cur, Shadow old) {
+  thr->racy_state[0] = cur.raw();
+  thr->racy_state[1] = old.raw();
+  thr->racy_shadow_addr = shadow_mem;
+#if !SANITIZER_GO
+  HACKY_CALL(__tsan_report_race);
+#else
+  ReportRace(thr);
+#endif
+}
+
+static inline bool HappensBefore(Shadow old, ThreadState *thr) {
+  return thr->clock.get(old.TidWithIgnore()) >= old.epoch();
+}
+
+ALWAYS_INLINE
+void MemoryAccessImpl1(ThreadState *thr, uptr addr, int kAccessSizeLog,
+                       bool kAccessIsWrite, bool kIsAtomic, u64 *shadow_mem,
+                       Shadow cur) {
+  // This potentially can live in an MMX/SSE scratch register.
+  // The required intrinsics are:
+  // __m128i _mm_move_epi64(__m128i*);
+  // _mm_storel_epi64(u64*, __m128i);
+  u64 store_word = cur.raw();
+  bool stored = false;
+
+  // scan all the shadow values and dispatch to 4 categories:
+  // same, replace, candidate and race (see comments below).
+  // we consider only 3 cases regarding access sizes:
+  // equal, intersect and not intersect. initially I considered
+  // larger and smaller as well, it allowed to replace some
+  // 'candidates' with 'same' or 'replace', but I think
+  // it's just not worth it (performance- and complexity-wise).
+
+  Shadow old(0);
+
+  // It release mode we manually unroll the loop,
+  // because empirically gcc generates better code this way.
+  // However, we can't afford unrolling in debug mode, because the function
+  // consumes almost 4K of stack. Gtest gives only 4K of stack to death test
+  // threads, which is not enough for the unrolled loop.
+#if SANITIZER_DEBUG
+  for (int idx = 0; idx < 4; idx++) {
+#  include "tsan_update_shadow_word.inc"
+  }
+#else
+  int idx = 0;
+#  include "tsan_update_shadow_word.inc"
+  idx = 1;
+  if (stored) {
+#  include "tsan_update_shadow_word.inc"
+  } else {
+#  include "tsan_update_shadow_word.inc"
+  }
+  idx = 2;
+  if (stored) {
+#  include "tsan_update_shadow_word.inc"
+  } else {
+#  include "tsan_update_shadow_word.inc"
+  }
+  idx = 3;
+  if (stored) {
+#  include "tsan_update_shadow_word.inc"
+  } else {
+#  include "tsan_update_shadow_word.inc"
+  }
+#endif
+
+  // we did not find any races and had already stored
+  // the current access info, so we are done
+  if (LIKELY(stored))
+    return;
+  // choose a random candidate slot and replace it
+  StoreShadow(shadow_mem + (cur.epoch() % kShadowCnt), store_word);
+  return;
+RACE:
+  HandleRace(thr, shadow_mem, cur, old);
+  return;
+}
+
+void UnalignedMemoryAccess(ThreadState *thr, uptr pc, uptr addr, uptr size,
+                           AccessType typ) {
+  DCHECK(!(typ & kAccessAtomic));
+  const bool kAccessIsWrite = !(typ & kAccessRead);
+  const bool kIsAtomic = false;
+  while (size) {
+    int size1 = 1;
+    int kAccessSizeLog = kSizeLog1;
+    if (size >= 8 && (addr & ~7) == ((addr + 7) & ~7)) {
+      size1 = 8;
+      kAccessSizeLog = kSizeLog8;
+    } else if (size >= 4 && (addr & ~7) == ((addr + 3) & ~7)) {
+      size1 = 4;
+      kAccessSizeLog = kSizeLog4;
+    } else if (size >= 2 && (addr & ~7) == ((addr + 1) & ~7)) {
+      size1 = 2;
+      kAccessSizeLog = kSizeLog2;
+    }
+    MemoryAccess(thr, pc, addr, kAccessSizeLog, kAccessIsWrite, kIsAtomic);
+    addr += size1;
+    size -= size1;
+  }
+}
+
+ALWAYS_INLINE
+bool ContainsSameAccessSlow(u64 *s, u64 a, u64 sync_epoch, bool is_write) {
+  Shadow cur(a);
+  for (uptr i = 0; i < kShadowCnt; i++) {
+    Shadow old(LoadShadow(&s[i]));
+    if (Shadow::Addr0AndSizeAreEqual(cur, old) &&
+        old.TidWithIgnore() == cur.TidWithIgnore() &&
+        old.epoch() > sync_epoch && old.IsAtomic() == cur.IsAtomic() &&
+        old.IsRead() <= cur.IsRead())
+      return true;
+  }
+  return false;
+}
+
+#if TSAN_VECTORIZE
+#  define SHUF(v0, v1, i0, i1, i2, i3)                    \
+    _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(v0), \
+                                    _mm_castsi128_ps(v1), \
+                                    (i0)*1 + (i1)*4 + (i2)*16 + (i3)*64))
+ALWAYS_INLINE
+bool ContainsSameAccessFast(u64 *s, u64 a, u64 sync_epoch, bool is_write) {
+  // This is an optimized version of ContainsSameAccessSlow.
+  // load current access into access[0:63]
+  const m128 access = _mm_cvtsi64_si128(a);
+  // duplicate high part of access in addr0:
+  // addr0[0:31]        = access[32:63]
+  // addr0[32:63]       = access[32:63]
+  // addr0[64:95]       = access[32:63]
+  // addr0[96:127]      = access[32:63]
+  const m128 addr0 = SHUF(access, access, 1, 1, 1, 1);
+  // load 4 shadow slots
+  const m128 shadow0 = _mm_load_si128((__m128i *)s);
+  const m128 shadow1 = _mm_load_si128((__m128i *)s + 1);
+  // load high parts of 4 shadow slots into addr_vect:
+  // addr_vect[0:31]    = shadow0[32:63]
+  // addr_vect[32:63]   = shadow0[96:127]
+  // addr_vect[64:95]   = shadow1[32:63]
+  // addr_vect[96:127]  = shadow1[96:127]
+  m128 addr_vect = SHUF(shadow0, shadow1, 1, 3, 1, 3);
+  if (!is_write) {
+    // set IsRead bit in addr_vect
+    const m128 rw_mask1 = _mm_cvtsi64_si128(1 << 15);
+    const m128 rw_mask = SHUF(rw_mask1, rw_mask1, 0, 0, 0, 0);
+    addr_vect = _mm_or_si128(addr_vect, rw_mask);
+  }
+  // addr0 == addr_vect?
+  const m128 addr_res = _mm_cmpeq_epi32(addr0, addr_vect);
+  // epoch1[0:63]       = sync_epoch
+  const m128 epoch1 = _mm_cvtsi64_si128(sync_epoch);
+  // epoch[0:31]        = sync_epoch[0:31]
+  // epoch[32:63]       = sync_epoch[0:31]
+  // epoch[64:95]       = sync_epoch[0:31]
+  // epoch[96:127]      = sync_epoch[0:31]
+  const m128 epoch = SHUF(epoch1, epoch1, 0, 0, 0, 0);
+  // load low parts of shadow cell epochs into epoch_vect:
+  // epoch_vect[0:31]   = shadow0[0:31]
+  // epoch_vect[32:63]  = shadow0[64:95]
+  // epoch_vect[64:95]  = shadow1[0:31]
+  // epoch_vect[96:127] = shadow1[64:95]
+  const m128 epoch_vect = SHUF(shadow0, shadow1, 0, 2, 0, 2);
+  // epoch_vect >= sync_epoch?
+  const m128 epoch_res = _mm_cmpgt_epi32(epoch_vect, epoch);
+  // addr_res & epoch_res
+  const m128 res = _mm_and_si128(addr_res, epoch_res);
+  // mask[0] = res[7]
+  // mask[1] = res[15]
+  // ...
+  // mask[15] = res[127]
+  const int mask = _mm_movemask_epi8(res);
+  return mask != 0;
+}
+#endif
+
+ALWAYS_INLINE
+bool ContainsSameAccess(u64 *s, u64 a, u64 sync_epoch, bool is_write) {
+#if TSAN_VECTORIZE
+  bool res = ContainsSameAccessFast(s, a, sync_epoch, is_write);
+  // NOTE: this check can fail if the shadow is concurrently mutated
+  // by other threads. But it still can be useful if you modify
+  // ContainsSameAccessFast and want to ensure that it's not completely broken.
+  // DCHECK_EQ(res, ContainsSameAccessSlow(s, a, sync_epoch, is_write));
+  return res;
+#else
+  return ContainsSameAccessSlow(s, a, sync_epoch, is_write);
+#endif
+}
+
+ALWAYS_INLINE USED void MemoryAccess(ThreadState *thr, uptr pc, uptr addr,
+                                     int kAccessSizeLog, bool kAccessIsWrite,
+                                     bool kIsAtomic) {
+  RawShadow *shadow_mem = MemToShadow(addr);
+  DPrintf2(
+      "#%d: MemoryAccess: @%p %p size=%d"
+      " is_write=%d shadow_mem=%p {%zx, %zx, %zx, %zx}\n",
+      (int)thr->fast_state.tid(), (void *)pc, (void *)addr,
+      (int)(1 << kAccessSizeLog), kAccessIsWrite, shadow_mem,
+      (uptr)shadow_mem[0], (uptr)shadow_mem[1], (uptr)shadow_mem[2],
+      (uptr)shadow_mem[3]);
+#if SANITIZER_DEBUG
+  if (!IsAppMem(addr)) {
+    Printf("Access to non app mem %zx\n", addr);
+    DCHECK(IsAppMem(addr));
+  }
+  if (!IsShadowMem(shadow_mem)) {
+    Printf("Bad shadow addr %p (%zx)\n", shadow_mem, addr);
+    DCHECK(IsShadowMem(shadow_mem));
+  }
+#endif
+
+  if (!SANITIZER_GO && !kAccessIsWrite && *shadow_mem == kShadowRodata) {
+    // Access to .rodata section, no races here.
+    // Measurements show that it can be 10-20% of all memory accesses.
+    return;
+  }
+
+  FastState fast_state = thr->fast_state;
+  if (UNLIKELY(fast_state.GetIgnoreBit())) {
+    return;
+  }
+
+  Shadow cur(fast_state);
+  cur.SetAddr0AndSizeLog(addr & 7, kAccessSizeLog);
+  cur.SetWrite(kAccessIsWrite);
+  cur.SetAtomic(kIsAtomic);
+
+  if (LIKELY(ContainsSameAccess(shadow_mem, cur.raw(), thr->fast_synch_epoch,
+                                kAccessIsWrite))) {
+    return;
+  }
+
+  if (kCollectHistory) {
+    fast_state.IncrementEpoch();
+    thr->fast_state = fast_state;
+    TraceAddEvent(thr, fast_state, EventTypeMop, pc);
+    cur.IncrementEpoch();
+  }
+
+  MemoryAccessImpl1(thr, addr, kAccessSizeLog, kAccessIsWrite, kIsAtomic,
+                    shadow_mem, cur);
+}
+
+// Called by MemoryAccessRange in tsan_rtl_thread.cpp
+ALWAYS_INLINE USED void MemoryAccessImpl(ThreadState *thr, uptr addr,
+                                         int kAccessSizeLog,
+                                         bool kAccessIsWrite, bool kIsAtomic,
+                                         u64 *shadow_mem, Shadow cur) {
+  if (LIKELY(ContainsSameAccess(shadow_mem, cur.raw(), thr->fast_synch_epoch,
+                                kAccessIsWrite))) {
+    return;
+  }
+
+  MemoryAccessImpl1(thr, addr, kAccessSizeLog, kAccessIsWrite, kIsAtomic,
+                    shadow_mem, cur);
+}
+
+static void MemoryRangeSet(ThreadState *thr, uptr pc, uptr addr, uptr size,
+                           u64 val) {
+  (void)thr;
+  (void)pc;
+  if (size == 0)
+    return;
+  // FIXME: fix me.
+  uptr offset = addr % kShadowCell;
+  if (offset) {
+    offset = kShadowCell - offset;
+    if (size <= offset)
+      return;
+    addr += offset;
+    size -= offset;
+  }
+  DCHECK_EQ(addr % 8, 0);
+  // If a user passes some insane arguments (memset(0)),
+  // let it just crash as usual.
+  if (!IsAppMem(addr) || !IsAppMem(addr + size - 1))
+    return;
+  // Don't want to touch lots of shadow memory.
+  // If a program maps 10MB stack, there is no need reset the whole range.
+  size = (size + (kShadowCell - 1)) & ~(kShadowCell - 1);
+  // UnmapOrDie/MmapFixedNoReserve does not work on Windows.
+  if (SANITIZER_WINDOWS || size < common_flags()->clear_shadow_mmap_threshold) {
+    RawShadow *p = MemToShadow(addr);
+    CHECK(IsShadowMem(p));
+    CHECK(IsShadowMem(p + size * kShadowCnt / kShadowCell - 1));
+    // FIXME: may overwrite a part outside the region
+    for (uptr i = 0; i < size / kShadowCell * kShadowCnt;) {
+      p[i++] = val;
+      for (uptr j = 1; j < kShadowCnt; j++) p[i++] = 0;
+    }
+  } else {
+    // The region is big, reset only beginning and end.
+    const uptr kPageSize = GetPageSizeCached();
+    RawShadow *begin = MemToShadow(addr);
+    RawShadow *end = begin + size / kShadowCell * kShadowCnt;
+    RawShadow *p = begin;
+    // Set at least first kPageSize/2 to page boundary.
+    while ((p < begin + kPageSize / kShadowSize / 2) || ((uptr)p % kPageSize)) {
+      *p++ = val;
+      for (uptr j = 1; j < kShadowCnt; j++) *p++ = 0;
+    }
+    // Reset middle part.
+    RawShadow *p1 = p;
+    p = RoundDown(end, kPageSize);
+    if (!MmapFixedSuperNoReserve((uptr)p1, (uptr)p - (uptr)p1))
+      Die();
+    // Set the ending.
+    while (p < end) {
+      *p++ = val;
+      for (uptr j = 1; j < kShadowCnt; j++) *p++ = 0;
+    }
+  }
+}
+
+void MemoryResetRange(ThreadState *thr, uptr pc, uptr addr, uptr size) {
+  MemoryRangeSet(thr, pc, addr, size, 0);
+}
+
+void MemoryRangeFreed(ThreadState *thr, uptr pc, uptr addr, uptr size) {
+  // Processing more than 1k (4k of shadow) is expensive,
+  // can cause excessive memory consumption (user does not necessary touch
+  // the whole range) and most likely unnecessary.
+  if (size > 1024)
+    size = 1024;
+  CHECK_EQ(thr->is_freeing, false);
+  thr->is_freeing = true;
+  MemoryAccessRange(thr, pc, addr, size, true);
+  thr->is_freeing = false;
+  if (kCollectHistory) {
+    thr->fast_state.IncrementEpoch();
+    TraceAddEvent(thr, thr->fast_state, EventTypeMop, pc);
+  }
+  Shadow s(thr->fast_state);
+  s.ClearIgnoreBit();
+  s.MarkAsFreed();
+  s.SetWrite(true);
+  s.SetAddr0AndSizeLog(0, 3);
+  MemoryRangeSet(thr, pc, addr, size, s.raw());
+}
+
+void MemoryRangeImitateWrite(ThreadState *thr, uptr pc, uptr addr, uptr size) {
+  if (kCollectHistory) {
+    thr->fast_state.IncrementEpoch();
+    TraceAddEvent(thr, thr->fast_state, EventTypeMop, pc);
+  }
+  Shadow s(thr->fast_state);
+  s.ClearIgnoreBit();
+  s.SetWrite(true);
+  s.SetAddr0AndSizeLog(0, 3);
+  MemoryRangeSet(thr, pc, addr, size, s.raw());
+}
+
+void MemoryRangeImitateWriteOrResetRange(ThreadState *thr, uptr pc, uptr addr,
+                                         uptr size) {
+  if (thr->ignore_reads_and_writes == 0)
+    MemoryRangeImitateWrite(thr, pc, addr, size);
+  else
+    MemoryResetRange(thr, pc, addr, size);
+}
+
+void MemoryAccessRange(ThreadState *thr, uptr pc, uptr addr, uptr size,
+                       bool is_write) {
+  if (size == 0)
+    return;
+
+  RawShadow *shadow_mem = MemToShadow(addr);
+  DPrintf2("#%d: MemoryAccessRange: @%p %p size=%d is_write=%d\n", thr->tid,
+           (void *)pc, (void *)addr, (int)size, is_write);
+
+#if SANITIZER_DEBUG
+  if (!IsAppMem(addr)) {
+    Printf("Access to non app mem %zx\n", addr);
+    DCHECK(IsAppMem(addr));
+  }
+  if (!IsAppMem(addr + size - 1)) {
+    Printf("Access to non app mem %zx\n", addr + size - 1);
+    DCHECK(IsAppMem(addr + size - 1));
+  }
+  if (!IsShadowMem(shadow_mem)) {
+    Printf("Bad shadow addr %p (%zx)\n", shadow_mem, addr);
+    DCHECK(IsShadowMem(shadow_mem));
+  }
+  if (!IsShadowMem(shadow_mem + size * kShadowCnt / 8 - 1)) {
+    Printf("Bad shadow addr %p (%zx)\n", shadow_mem + size * kShadowCnt / 8 - 1,
+           addr + size - 1);
+    DCHECK(IsShadowMem(shadow_mem + size * kShadowCnt / 8 - 1));
+  }
+#endif
+
+  if (*shadow_mem == kShadowRodata) {
+    DCHECK(!is_write);
+    // Access to .rodata section, no races here.
+    // Measurements show that it can be 10-20% of all memory accesses.
+    return;
+  }
+
+  FastState fast_state = thr->fast_state;
+  if (fast_state.GetIgnoreBit())
+    return;
+
+  fast_state.IncrementEpoch();
+  thr->fast_state = fast_state;
+  TraceAddEvent(thr, fast_state, EventTypeMop, pc);
+
+  bool unaligned = (addr % kShadowCell) != 0;
+
+  // Handle unaligned beginning, if any.
+  for (; addr % kShadowCell && size; addr++, size--) {
+    int const kAccessSizeLog = 0;
+    Shadow cur(fast_state);
+    cur.SetWrite(is_write);
+    cur.SetAddr0AndSizeLog(addr & (kShadowCell - 1), kAccessSizeLog);
+    MemoryAccessImpl(thr, addr, kAccessSizeLog, is_write, false, shadow_mem,
+                     cur);
+  }
+  if (unaligned)
+    shadow_mem += kShadowCnt;
+  // Handle middle part, if any.
+  for (; size >= kShadowCell; addr += kShadowCell, size -= kShadowCell) {
+    int const kAccessSizeLog = 3;
+    Shadow cur(fast_state);
+    cur.SetWrite(is_write);
+    cur.SetAddr0AndSizeLog(0, kAccessSizeLog);
+    MemoryAccessImpl(thr, addr, kAccessSizeLog, is_write, false, shadow_mem,
+                     cur);
+    shadow_mem += kShadowCnt;
+  }
+  // Handle ending, if any.
+  for (; size; addr++, size--) {
+    int const kAccessSizeLog = 0;
+    Shadow cur(fast_state);
+    cur.SetWrite(is_write);
+    cur.SetAddr0AndSizeLog(addr & (kShadowCell - 1), kAccessSizeLog);
+    MemoryAccessImpl(thr, addr, kAccessSizeLog, is_write, false, shadow_mem,
+                     cur);
+  }
+}
+
+}  // namespace __tsan
+
+#if !SANITIZER_GO
+// Must be included in this file to make sure everything is inlined.
+#  include "tsan_interface.inc"
+#endif
--- a/libsanitizer/tsan/tsan_rtl_ppc64.S
+++ b/libsanitizer/tsan/tsan_rtl_ppc64.S
@ -1,6 +1,5 @@
 #include "tsan_ppc_regs.h"

-        .machine altivec
        .section .text
        .hidden __tsan_setjmp
        .globl _setjmp
--- a/libsanitizer/tsan/tsan_rtl_report.cpp
+++ b/libsanitizer/tsan/tsan_rtl_report.cpp
@ -560,9 +560,7 @@ bool RestoreStack(Tid tid, EventType type, Sid sid, Epoch epoch, uptr addr,
    if (tctx->thr)
      last_pos = (Event *)atomic_load_relaxed(&tctx->thr->trace_pos);
  }
-  // Too large for stack.
-  alignas(MutexSet) static char mset_storage[sizeof(MutexSet)];
-  MutexSet &mset = *new (mset_storage) MutexSet();
+  DynamicMutexSet mset;
  Vector<uptr> stack;
  uptr prev_pc = 0;
  bool found = false;
@ -588,7 +586,7 @@ bool RestoreStack(Tid tid, EventType type, Sid sid, Epoch epoch, uptr addr,
          if (match && type == EventType::kAccessExt &&
              IsWithinAccess(addr, size, ev_addr, ev_size) &&
              is_read == ev->is_read && is_atomic == ev->is_atomic && !is_free)
-            RestoreStackMatch(pstk, pmset, &stack, &mset, ev_pc, &found);
+            RestoreStackMatch(pstk, pmset, &stack, mset, ev_pc, &found);
          return;
        }
        if (evp->is_func) {
@ -615,7 +613,7 @@ bool RestoreStack(Tid tid, EventType type, Sid sid, Epoch epoch, uptr addr,
                IsWithinAccess(addr, size, ev_addr, ev_size) &&
                is_read == ev->is_read && is_atomic == ev->is_atomic &&
                !is_free)
-              RestoreStackMatch(pstk, pmset, &stack, &mset, ev->pc, &found);
+              RestoreStackMatch(pstk, pmset, &stack, mset, ev->pc, &found);
            break;
          }
          case EventType::kAccessRange: {
@ -630,7 +628,7 @@ bool RestoreStack(Tid tid, EventType type, Sid sid, Epoch epoch, uptr addr,
            if (match && type == EventType::kAccessExt &&
                IsWithinAccess(addr, size, ev_addr, ev_size) &&
                is_read == ev->is_read && !is_atomic && is_free == ev->is_free)
-              RestoreStackMatch(pstk, pmset, &stack, &mset, ev_pc, &found);
+              RestoreStackMatch(pstk, pmset, &stack, mset, ev_pc, &found);
            break;
          }
          case EventType::kLock:
@ -644,18 +642,18 @@ bool RestoreStack(Tid tid, EventType type, Sid sid, Epoch epoch, uptr addr,
                (ev->stack_hi << EventLock::kStackIDLoBits) + ev->stack_lo;
            DPrintf2("  Lock: pc=0x%zx addr=0x%zx stack=%u write=%d\n", ev_pc,
                     ev_addr, stack_id, is_write);
-            mset.AddAddr(ev_addr, stack_id, is_write);
+            mset->AddAddr(ev_addr, stack_id, is_write);
            // Events with ev_pc == 0 are written to the beginning of trace
            // part as initial mutex set (are not real).
            if (match && type == EventType::kLock && addr == ev_addr && ev_pc)
-              RestoreStackMatch(pstk, pmset, &stack, &mset, ev_pc, &found);
+              RestoreStackMatch(pstk, pmset, &stack, mset, ev_pc, &found);
            break;
          }
          case EventType::kUnlock: {
            auto *ev = reinterpret_cast<EventUnlock *>(evp);
            uptr ev_addr = RestoreAddr(ev->addr);
            DPrintf2("  Unlock: addr=0x%zx\n", ev_addr);
-            mset.DelAddr(ev_addr);
+            mset->DelAddr(ev_addr);
            break;
          }
          case EventType::kTime:
@ -897,11 +895,7 @@ void ReportRace(ThreadState *thr) {
  if (IsFiredSuppression(ctx, typ, traces[0]))
    return;

-  // MutexSet is too large to live on stack.
-  Vector<u64> mset_buffer;
-  mset_buffer.Resize(sizeof(MutexSet) / sizeof(u64) + 1);
-  MutexSet *mset2 = new(&mset_buffer[0]) MutexSet();
-
+  DynamicMutexSet mset2;
  Shadow s2(thr->racy_state[1]);
  RestoreStack(s2.tid(), s2.epoch(), &traces[1], mset2, &tags[1]);
  if (IsFiredSuppression(ctx, typ, traces[1]))
--- a/libsanitizer/tsan/tsan_rtl_thread.cpp
+++ b/libsanitizer/tsan/tsan_rtl_thread.cpp
@ -323,85 +323,6 @@ void ThreadSetName(ThreadState *thr, const char *name) {
  ctx->thread_registry.SetThreadName(thr->tid, name);
 }

-void MemoryAccessRange(ThreadState *thr, uptr pc, uptr addr,
-                       uptr size, bool is_write) {
-  if (size == 0)
-    return;
-
-  RawShadow *shadow_mem = MemToShadow(addr);
-  DPrintf2("#%d: MemoryAccessRange: @%p %p size=%d is_write=%d\n",
-      thr->tid, (void*)pc, (void*)addr,
-      (int)size, is_write);
-
-#if SANITIZER_DEBUG
-  if (!IsAppMem(addr)) {
-    Printf("Access to non app mem %zx\n", addr);
-    DCHECK(IsAppMem(addr));
-  }
-  if (!IsAppMem(addr + size - 1)) {
-    Printf("Access to non app mem %zx\n", addr + size - 1);
-    DCHECK(IsAppMem(addr + size - 1));
-  }
-  if (!IsShadowMem(shadow_mem)) {
-    Printf("Bad shadow addr %p (%zx)\n", shadow_mem, addr);
-    DCHECK(IsShadowMem(shadow_mem));
-  }
-  if (!IsShadowMem(shadow_mem + size * kShadowCnt / 8 - 1)) {
-    Printf("Bad shadow addr %p (%zx)\n",
-               shadow_mem + size * kShadowCnt / 8 - 1, addr + size - 1);
-    DCHECK(IsShadowMem(shadow_mem + size * kShadowCnt / 8 - 1));
-  }
-#endif
-
-  if (*shadow_mem == kShadowRodata) {
-    DCHECK(!is_write);
-    // Access to .rodata section, no races here.
-    // Measurements show that it can be 10-20% of all memory accesses.
-    return;
-  }
-
-  FastState fast_state = thr->fast_state;
-  if (fast_state.GetIgnoreBit())
-    return;
-
-  fast_state.IncrementEpoch();
-  thr->fast_state = fast_state;
-  TraceAddEvent(thr, fast_state, EventTypeMop, pc);
-
-  bool unaligned = (addr % kShadowCell) != 0;
-
-  // Handle unaligned beginning, if any.
-  for (; addr % kShadowCell && size; addr++, size--) {
-    int const kAccessSizeLog = 0;
-    Shadow cur(fast_state);
-    cur.SetWrite(is_write);
-    cur.SetAddr0AndSizeLog(addr & (kShadowCell - 1), kAccessSizeLog);
-    MemoryAccessImpl(thr, addr, kAccessSizeLog, is_write, false,
-        shadow_mem, cur);
-  }
-  if (unaligned)
-    shadow_mem += kShadowCnt;
-  // Handle middle part, if any.
-  for (; size >= kShadowCell; addr += kShadowCell, size -= kShadowCell) {
-    int const kAccessSizeLog = 3;
-    Shadow cur(fast_state);
-    cur.SetWrite(is_write);
-    cur.SetAddr0AndSizeLog(0, kAccessSizeLog);
-    MemoryAccessImpl(thr, addr, kAccessSizeLog, is_write, false,
-        shadow_mem, cur);
-    shadow_mem += kShadowCnt;
-  }
-  // Handle ending, if any.
-  for (; size; addr++, size--) {
-    int const kAccessSizeLog = 0;
-    Shadow cur(fast_state);
-    cur.SetWrite(is_write);
-    cur.SetAddr0AndSizeLog(addr & (kShadowCell - 1), kAccessSizeLog);
-    MemoryAccessImpl(thr, addr, kAccessSizeLog, is_write, false,
-        shadow_mem, cur);
-  }
-}
-
 #if !SANITIZER_GO
 void FiberSwitchImpl(ThreadState *from, ThreadState *to) {
  Processor *proc = from->proc();
--- a/libsanitizer/ubsan/ubsan_flags.cpp
+++ b/libsanitizer/ubsan/ubsan_flags.cpp
@ -50,7 +50,6 @@ void InitializeFlags() {
  {
    CommonFlags cf;
    cf.CopyFrom(*common_flags());
-    cf.print_summary = false;
    cf.external_symbolizer_path = GetFlag("UBSAN_SYMBOLIZER_PATH");
    OverrideCommonFlags(cf);
  }
--- a/libsanitizer/ubsan/ubsan_handlers.cpp
+++ b/libsanitizer/ubsan/ubsan_handlers.cpp
@ -894,21 +894,6 @@ void __ubsan_handle_cfi_bad_type(CFICheckFailData *Data, ValueHandle Vtable,

 }  // namespace __ubsan

-void __ubsan::__ubsan_handle_cfi_bad_icall(CFIBadIcallData *CallData,
-                                           ValueHandle Function) {
-  GET_REPORT_OPTIONS(false);
-  CFICheckFailData Data = {CFITCK_ICall, CallData->Loc, CallData->Type};
-  handleCFIBadIcall(&Data, Function, Opts);
-}
-
-void __ubsan::__ubsan_handle_cfi_bad_icall_abort(CFIBadIcallData *CallData,
-                                                 ValueHandle Function) {
-  GET_REPORT_OPTIONS(true);
-  CFICheckFailData Data = {CFITCK_ICall, CallData->Loc, CallData->Type};
-  handleCFIBadIcall(&Data, Function, Opts);
-  Die();
-}
-
 void __ubsan::__ubsan_handle_cfi_check_fail(CFICheckFailData *Data,
                                            ValueHandle Value,
                                            uptr ValidVtable) {
--- a/libsanitizer/ubsan/ubsan_handlers.h
+++ b/libsanitizer/ubsan/ubsan_handlers.h
@ -215,20 +215,12 @@ enum CFITypeCheckKind : unsigned char {
  CFITCK_VMFCall,
 };

-struct CFIBadIcallData {
-  SourceLocation Loc;
-  const TypeDescriptor &Type;
-};
-
 struct CFICheckFailData {
  CFITypeCheckKind CheckKind;
  SourceLocation Loc;
  const TypeDescriptor &Type;
 };

-/// \brief Handle control flow integrity failure for indirect function calls.
-RECOVERABLE(cfi_bad_icall, CFIBadIcallData *Data, ValueHandle Function)
-
 /// \brief Handle control flow integrity failures.
 RECOVERABLE(cfi_check_fail, CFICheckFailData *Data, ValueHandle Function,
            uptr VtableIsValid)
--- a/libsanitizer/ubsan/ubsan_platform.h
+++ b/libsanitizer/ubsan/ubsan_platform.h
@ -12,7 +12,6 @@
 #ifndef UBSAN_PLATFORM_H
 #define UBSAN_PLATFORM_H

-#ifndef CAN_SANITIZE_UB
 // Other platforms should be easy to add, and probably work as-is.
 #if defined(__linux__) || defined(__FreeBSD__) || defined(__APPLE__) ||        \
    defined(__NetBSD__) || defined(__DragonFly__) ||                           \
@ -22,6 +21,5 @@
 #else
 # define CAN_SANITIZE_UB 0
 #endif
-#endif //CAN_SANITIZE_UB

 #endif