[tbb-commits] [tor-browser] 44/73: Bug 1777604 - wasm: Move membarrier call to separate functions. r=nbp, a=RyanVM

gitolite role git at cupani.torproject.org
Wed Sep 21 20:17:37 UTC 2022


This is an automated email from the git hooks/post-receive script.

richard pushed a commit to branch geckoview-102.3.0esr-12.0-1
in repository tor-browser.

commit 257982b707fbe6676ab7b9f0b7771f90a7f60101
Author: Ryan Hunt <rhunt at eqrion.net>
AuthorDate: Thu Jul 28 13:27:02 2022 +0000

    Bug 1777604 - wasm: Move membarrier call to separate functions. r=nbp, a=RyanVM
    
    Differential Revision: https://phabricator.services.mozilla.com/D152305
---
 js/src/jit/AutoWritableJitCode.h                   |   3 +-
 js/src/jit/ExecutableAllocator.h                   |  15 +--
 js/src/jit/FlushICache.cpp                         | 132 +++++++++++++++++++++
 js/src/jit/FlushICache.h                           |  33 ++++--
 js/src/jit/ProcessExecutableMemory.cpp             |   6 +-
 js/src/jit/ProcessExecutableMemory.h               |  10 +-
 js/src/jit/arm/Architecture-arm.cpp                |   2 +-
 js/src/jit/arm64/Architecture-arm64.cpp            |   8 +-
 js/src/jit/arm64/vixl/Cpu-vixl.h                   |   6 +-
 js/src/jit/arm64/vixl/MozCpu-vixl.cpp              | 112 ++---------------
 js/src/jit/loong64/Architecture-loong64.cpp        |   2 +-
 .../jit/mips-shared/Architecture-mips-shared.cpp   |   2 +-
 js/src/jit/moz.build                               |   1 +
 js/src/jsapi-tests/testsJit.cpp                    |   4 +-
 js/src/wasm/WasmBuiltins.cpp                       |   5 +-
 js/src/wasm/WasmCode.cpp                           |  42 ++-----
 js/src/wasm/WasmCode.h                             |  10 +-
 js/src/wasm/WasmCompile.cpp                        |  25 ++--
 js/src/wasm/WasmModule.cpp                         |  11 +-
 19 files changed, 221 insertions(+), 208 deletions(-)

diff --git a/js/src/jit/AutoWritableJitCode.h b/js/src/jit/AutoWritableJitCode.h
index 67fa84c2dbd61..ab5b35a54f763 100644
--- a/js/src/jit/AutoWritableJitCode.h
+++ b/js/src/jit/AutoWritableJitCode.h
@@ -59,8 +59,7 @@ class MOZ_RAII AutoWritableJitCodeFallible {
       }
     });
 
-    if (!ExecutableAllocator::makeExecutableAndFlushICache(
-            FlushICacheSpec::LocalThreadOnly, addr_, size_)) {
+    if (!ExecutableAllocator::makeExecutableAndFlushICache(addr_, size_)) {
       MOZ_CRASH();
     }
     rt_->toggleAutoWritableJitCodeActive(false);
diff --git a/js/src/jit/ExecutableAllocator.h b/js/src/jit/ExecutableAllocator.h
index 266c7af4b8548..85c01562c373a 100644
--- a/js/src/jit/ExecutableAllocator.h
+++ b/js/src/jit/ExecutableAllocator.h
@@ -172,19 +172,10 @@ class ExecutableAllocator {
                            MustFlushICache::No);
   }
 
-  [[nodiscard]] static bool makeExecutableAndFlushICache(
-      FlushICacheSpec flushSpec, void* start, size_t size) {
-    MustFlushICache mustFlushICache;
-    switch (flushSpec) {
-      case FlushICacheSpec::LocalThreadOnly:
-        mustFlushICache = MustFlushICache::LocalThreadOnly;
-        break;
-      case FlushICacheSpec::AllThreads:
-        mustFlushICache = MustFlushICache::AllThreads;
-        break;
-    }
+  [[nodiscard]] static bool makeExecutableAndFlushICache(void* start,
+                                                         size_t size) {
     return ReprotectRegion(start, size, ProtectionSetting::Executable,
-                           mustFlushICache);
+                           MustFlushICache::Yes);
   }
 
   static void poisonCode(JSRuntime* rt, JitPoisonRangeVector& ranges);
diff --git a/js/src/jit/FlushICache.cpp b/js/src/jit/FlushICache.cpp
new file mode 100644
index 0000000000000..1e2ec69272fe8
--- /dev/null
+++ b/js/src/jit/FlushICache.cpp
@@ -0,0 +1,132 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "jit/FlushICache.h"
+
+#ifdef JS_CODEGEN_ARM64
+#  include "jit/arm64/vixl/MozCachingDecoder.h"
+#  include "jit/arm64/vixl/Simulator-vixl.h"
+#endif
+
+#if defined(JS_CODEGEN_ARM) || defined(JS_CODEGEN_ARM64)
+
+#  ifdef __linux__
+#    include <linux/version.h>
+#    define LINUX_HAS_MEMBARRIER (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 3, 0))
+#  else
+#    define LINUX_HAS_MEMBARRIER 0
+#  endif
+
+#  if LINUX_HAS_MEMBARRIER || defined(__android__)
+#    include <string.h>
+
+#    if LINUX_HAS_MEMBARRIER
+#      include <linux/membarrier.h>
+#      include <sys/syscall.h>
+#      include <sys/utsname.h>
+#      include <unistd.h>
+#    elif defined(__android__)
+#      include <sys/syscall.h>
+#      include <unistd.h>
+#    else
+#      error "Missing platform-specific declarations for membarrier syscall!"
+#    endif  // __linux__ / ANDROID
+
+static int membarrier(int cmd, int flags) {
+  return syscall(__NR_membarrier, cmd, flags);
+}
+
+// These definitions come from the Linux kernel source, for kernels before 4.16
+// which didn't have access to these membarrier commands.
+#    ifndef MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE
+#      define MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE (1 << 5)
+#    endif
+
+#    ifndef MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE
+#      define MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE (1 << 6)
+#    endif
+#  endif  // LINUX_HAS_MEMBARRIER || defined(__android__)
+
+using namespace js;
+using namespace js::jit;
+
+namespace js {
+namespace jit {
+
+bool CanFlushExecutionContextForAllThreads() {
+#  if (LINUX_HAS_MEMBARRIER || defined(__android__))
+  // On linux, check the kernel supports membarrier(2), that is, it's a kernel
+  // above Linux 4.16 included.
+  //
+  // Note: this code has been extracted (August 2020) from
+  // https://android.googlesource.com/platform/art/+/58520dfba31d6eeef75f5babff15e09aa28e5db8/libartbase/base/membarrier.cc#50
+  static constexpr int kRequiredMajor = 4;
+  static constexpr int kRequiredMinor = 16;
+
+  static bool computed = false;
+  static bool kernelHasMembarrier = false;
+
+  if (computed) {
+    return kernelHasMembarrier;
+  }
+
+  struct utsname uts;
+  int major, minor;
+  kernelHasMembarrier = uname(&uts) == 0 && strcmp(uts.sysname, "Linux") == 0 &&
+                        sscanf(uts.release, "%d.%d", &major, &minor) == 2 &&
+                        major >= kRequiredMajor &&
+                        (major != kRequiredMajor || minor >= kRequiredMinor);
+
+  // As a test bed, try to run the syscall with the command registering the
+  // intent to use the actual membarrier we'll want to carry out later.
+  //
+  // IMPORTANT: This is required or else running the membarrier later won't
+  // actually interrupt the threads in this process.
+  if (kernelHasMembarrier &&
+      membarrier(MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE, 0) != 0) {
+    kernelHasMembarrier = false;
+  }
+
+  computed = true;
+  return kernelHasMembarrier;
+#  else
+  // On other platforms, we assume that the syscall for flushing the icache
+  // will flush the execution context for other cores.
+  return true;
+#  endif
+}
+
+void FlushExecutionContextForAllThreads() {
+  // Callers must check that this operation is available.
+  MOZ_RELEASE_ASSERT(CanFlushExecutionContextForAllThreads());
+
+#  if defined(JS_SIMULATOR_ARM64) && defined(JS_CACHE_SIMULATOR_ARM64)
+  // Emulate what the real hardware would do by emitting a membarrier that'll
+  // interrupt and flush the execution context of all threads.
+  using js::jit::SimulatorProcess;
+  js::jit::AutoLockSimulatorCache alsc;
+  SimulatorProcess::membarrier();
+#  elif (LINUX_HAS_MEMBARRIER || defined(__android__))
+  // The caller has checked this can be performed, which will have registered
+  // this process to receive the membarrier. See above.
+  //
+  // membarrier will trigger an inter-processor-interrupt on any active threads
+  // of this process. This is an execution context synchronization event
+  // equivalent to running an `isb` instruction.
+  if (membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE, 0) != 0) {
+    // Better safe than sorry.
+    MOZ_CRASH("membarrier can't be executed");
+  }
+#  else
+  // On other platforms, we assume that the syscall for flushing the icache
+  // will flush the execution context for other cores.
+#  endif
+}
+
+}  // namespace jit
+}  // namespace js
+
+#endif
diff --git a/js/src/jit/FlushICache.h b/js/src/jit/FlushICache.h
index 6c780e43e8665..6ef08c63d3acd 100644
--- a/js/src/jit/FlushICache.h
+++ b/js/src/jit/FlushICache.h
@@ -18,8 +18,7 @@ namespace jit {
 
 #if defined(JS_CODEGEN_X86) || defined(JS_CODEGEN_X64)
 
-inline void FlushICache(void* code, size_t size,
-                        bool codeIsThreadLocal = true) {
+inline void FlushICache(void* code, size_t size) {
   // No-op. Code and data caches are coherent on x86 and x64.
 }
 
@@ -27,14 +26,15 @@ inline void FlushICache(void* code, size_t size,
     (defined(JS_CODEGEN_MIPS32) || defined(JS_CODEGEN_MIPS64)) || \
     defined(JS_CODEGEN_LOONG64)
 
-extern void FlushICache(void* code, size_t size, bool codeIsThreadLocal = true);
+// Invalidate the given code range from the icache. This will also flush the
+// execution context for this core. If this code is to be executed on another
+// thread, that thread must perform an execution context flush first using
+// `FlushExecutionContext` below.
+extern void FlushICache(void* code, size_t size);
 
 #elif defined(JS_CODEGEN_NONE)
 
-inline void FlushICache(void* code, size_t size,
-                        bool codeIsThreadLocal = true) {
-  MOZ_CRASH();
-}
+inline void FlushICache(void* code, size_t size) { MOZ_CRASH(); }
 
 #else
 #  error "Unknown architecture!"
@@ -47,10 +47,16 @@ inline void FlushICache(void* code, size_t size,
 inline void FlushExecutionContext() {
   // No-op. Execution context is coherent with instruction cache.
 }
+inline bool CanFlushExecutionContextForAllThreads() { return true; }
+inline void FlushExecutionContextForAllThreads() {
+  // No-op. Execution context is coherent with instruction cache.
+}
 
 #elif defined(JS_CODEGEN_NONE) || defined(JS_CODEGEN_WASM32)
 
 inline void FlushExecutionContext() { MOZ_CRASH(); }
+inline bool CanFlushExecutionContextForAllThreads() { MOZ_CRASH(); }
+inline void FlushExecutionContextForAllThreads() { MOZ_CRASH(); }
 
 #elif defined(JS_CODEGEN_ARM) || defined(JS_CODEGEN_ARM64)
 
@@ -63,6 +69,19 @@ inline void FlushExecutionContext() { MOZ_CRASH(); }
 // this method.
 extern void FlushExecutionContext();
 
+// Some platforms can flush the excecution context for other threads using a
+// syscall. This is required when JIT'ed code will be published to multiple
+// threads without a synchronization point where a `FlushExecutionContext`
+// could be inserted.
+extern bool CanFlushExecutionContextForAllThreads();
+
+// Flushes the execution context of all threads in this process, equivalent to
+// running `FlushExecutionContext` on every thread.
+//
+// Callers must ensure `CanFlushExecutionContextForAllThreads` is true, or
+// else this will crash.
+extern void FlushExecutionContextForAllThreads();
+
 #else
 #  error "Unknown architecture!"
 #endif
diff --git a/js/src/jit/ProcessExecutableMemory.cpp b/js/src/jit/ProcessExecutableMemory.cpp
index 5ea4b2e4ca5b4..2085a4802c232 100644
--- a/js/src/jit/ProcessExecutableMemory.cpp
+++ b/js/src/jit/ProcessExecutableMemory.cpp
@@ -749,11 +749,9 @@ bool js::jit::ReprotectRegion(void* start, size_t size,
                               ProtectionSetting protection,
                               MustFlushICache flushICache) {
   // Flush ICache when making code executable, before we modify |size|.
-  if (flushICache == MustFlushICache::LocalThreadOnly ||
-      flushICache == MustFlushICache::AllThreads) {
+  if (flushICache == MustFlushICache::Yes) {
     MOZ_ASSERT(protection == ProtectionSetting::Executable);
-    bool codeIsThreadLocal = flushICache == MustFlushICache::LocalThreadOnly;
-    jit::FlushICache(start, size, codeIsThreadLocal);
+    jit::FlushICache(start, size);
   }
 
   // Calculate the start of the page containing this region,
diff --git a/js/src/jit/ProcessExecutableMemory.h b/js/src/jit/ProcessExecutableMemory.h
index de2109f5310cc..51747634f38ee 100644
--- a/js/src/jit/ProcessExecutableMemory.h
+++ b/js/src/jit/ProcessExecutableMemory.h
@@ -68,15 +68,9 @@ enum class ProtectionSetting {
   Executable,
 };
 
-/// Whether the instruction cache must be flushed:
-//- No means no flushing will happen.
-//- LocalThreadOnly means only the local thread's icache will be flushed.
-//- AllThreads means all the threads' icaches will be flushed; this must be used
-// when the compiling thread and the executing thread might be different.
+/// Whether the instruction cache must be flushed
 
-enum class MustFlushICache { No, LocalThreadOnly, AllThreads };
-
-enum class FlushICacheSpec { LocalThreadOnly, AllThreads };
+enum class MustFlushICache { No, Yes };
 
 [[nodiscard]] extern bool ReprotectRegion(void* start, size_t size,
                                           ProtectionSetting protection,
diff --git a/js/src/jit/arm/Architecture-arm.cpp b/js/src/jit/arm/Architecture-arm.cpp
index 2491c67350146..687c45f8f7373 100644
--- a/js/src/jit/arm/Architecture-arm.cpp
+++ b/js/src/jit/arm/Architecture-arm.cpp
@@ -481,7 +481,7 @@ uint32_t FloatRegisters::ActualTotalPhys() {
   return 16;
 }
 
-void FlushICache(void* code, size_t size, bool codeIsThreadLocal) {
+void FlushICache(void* code, size_t size) {
 #if defined(JS_SIMULATOR_ARM)
   js::jit::SimulatorProcess::FlushICache(code, size);
 
diff --git a/js/src/jit/arm64/Architecture-arm64.cpp b/js/src/jit/arm64/Architecture-arm64.cpp
index f95c0231d84d3..eb3dd67b1a9b8 100644
--- a/js/src/jit/arm64/Architecture-arm64.cpp
+++ b/js/src/jit/arm64/Architecture-arm64.cpp
@@ -119,12 +119,8 @@ uint32_t GetARM64Flags() { return 0; }
 // computed".
 bool CPUFlagsHaveBeenComputed() { return true; }
 
-void FlushICache(void* code, size_t size, bool codeIsThreadLocal) {
-  vixl::CPU::EnsureIAndDCacheCoherency(code, size, codeIsThreadLocal);
-}
-
-bool CanFlushICacheFromBackgroundThreads() {
-  return vixl::CPU::CanFlushICacheFromBackgroundThreads();
+void FlushICache(void* code, size_t size) {
+  vixl::CPU::EnsureIAndDCacheCoherency(code, size);
 }
 
 void FlushExecutionContext() { vixl::CPU::FlushExecutionContext(); }
diff --git a/js/src/jit/arm64/vixl/Cpu-vixl.h b/js/src/jit/arm64/vixl/Cpu-vixl.h
index ac709bccbf2a6..4db51aad6b473 100644
--- a/js/src/jit/arm64/vixl/Cpu-vixl.h
+++ b/js/src/jit/arm64/vixl/Cpu-vixl.h
@@ -165,11 +165,7 @@ class CPU {
   // the I and D caches. I and D caches are not automatically coherent on ARM
   // so this operation is required before any dynamically generated code can
   // safely run.
-  static void EnsureIAndDCacheCoherency(void *address, size_t length, bool codeIsThreadLocal);
-
-  // Returns true when the current machine supports flushing the instruction
-  // cache on a background thread.
-  static bool CanFlushICacheFromBackgroundThreads();
+  static void EnsureIAndDCacheCoherency(void* address, size_t length);
 
   // Flush the local instruction pipeline, forcing a reload of any instructions
   // beyond this barrier from the icache.
diff --git a/js/src/jit/arm64/vixl/MozCpu-vixl.cpp b/js/src/jit/arm64/vixl/MozCpu-vixl.cpp
index ad96098501679..909cc590aeb78 100644
--- a/js/src/jit/arm64/vixl/MozCpu-vixl.cpp
+++ b/js/src/jit/arm64/vixl/MozCpu-vixl.cpp
@@ -33,40 +33,8 @@
 #  include <libkern/OSCacheControl.h>
 #endif
 
-#if defined(__aarch64__) && (defined(__linux__) || defined(__android__))
-#   if defined(__linux__)
-#    include <linux/membarrier.h>
-#    include <sys/syscall.h>
-#    include <sys/utsname.h>
-#    include <unistd.h>
-#   elif defined(__ANDROID__)
-#    include <sys/syscall.h>
-#    include <unistd.h>
-#   else
-#    error "Missing platform-specific declarations for membarrier syscall!"
-#   endif // __linux__ / ANDROID
-
-#  include "vm/JSContext.h" // TlsContext
-
-static int membarrier(int cmd, int flags) {
-    return syscall(__NR_membarrier, cmd, flags);
-}
-
-// These definitions come from the Linux kernel source, for kernels before 4.16
-// which didn't have access to these membarrier commands.
-#  ifndef MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE
-#  define MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE (1 << 5)
-#  endif
-
-#  ifndef MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE
-#  define MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE (1 << 6)
-#  endif
-
-#endif // __aarch64__
-
 namespace vixl {
 
-
 // Currently computes I and D cache line size.
 void CPU::SetUp() {
   uint32_t cache_type_register = GetCacheType();
@@ -115,45 +83,7 @@ uint32_t CPU::GetCacheType() {
 #endif
 }
 
-bool CPU::CanFlushICacheFromBackgroundThreads() {
-#if defined(__aarch64__) && (defined(__linux__) || defined(__android__))
-  // On linux, check the kernel supports membarrier(2), that is, it's a kernel
-  // above Linux 4.16 included.
-  //
-  // Note: this code has been extracted (August 2020) from
-  // https://android.googlesource.com/platform/art/+/58520dfba31d6eeef75f5babff15e09aa28e5db8/libartbase/base/membarrier.cc#50
-  static constexpr int kRequiredMajor = 4;
-  static constexpr int kRequiredMinor = 16;
-
-  static bool computed = false;
-  static bool kernelHasMembarrier = false;
-
-  if (!computed) {
-    struct utsname uts;
-    int major, minor;
-    kernelHasMembarrier = uname(&uts) == 0 &&
-        strcmp(uts.sysname, "Linux") == 0 &&
-        sscanf(uts.release, "%d.%d", &major, &minor) == 2 &&
-        major >= kRequiredMajor && (major != kRequiredMajor || minor >= kRequiredMinor);
-
-    // As a test bed, try to run the syscall with the command registering the
-    // intent to use the actual membarrier we'll want to carry out later.
-    if (kernelHasMembarrier &&
-        membarrier(MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE, 0) != 0) {
-      kernelHasMembarrier = false;
-    }
-
-    computed = true;
-  }
-
-  return kernelHasMembarrier;
-#else
-  // On other platforms, we assume that the provided syscall does the right thing.
-  return true;
-#endif
-}
-
-void CPU::EnsureIAndDCacheCoherency(void *address, size_t length, bool codeIsThreadLocal) {
+void CPU::EnsureIAndDCacheCoherency(void* address, size_t length) {
 #if defined(JS_SIMULATOR_ARM64) && defined(JS_CACHE_SIMULATOR_ARM64)
   // This code attempts to emulate what the following assembly sequence is
   // doing, which is sending the information to all cores that some cache line
@@ -175,11 +105,6 @@ void CPU::EnsureIAndDCacheCoherency(void *address, size_t length, bool codeIsThr
   Simulator* sim = vixl::Simulator::Current();
   if (sim) {
     sim->FlushICache();
-  } else if (!codeIsThreadLocal) {
-    // We're on a background thread; emulate what the real hardware would do by
-    // emitting a membarrier that'll interrupt and cause an icache invalidation
-    // on all the threads.
-    SimulatorProcess::membarrier();
   }
 #elif defined(_MSC_VER) && defined(_M_ARM64)
   FlushInstructionCache(GetCurrentProcess(), address, length);
@@ -262,31 +187,18 @@ void CPU::EnsureIAndDCacheCoherency(void *address, size_t length, bool codeIsThr
     iline += isize;
   } while (iline < end);
 
-  __asm__ __volatile__ (
-    // Make sure that the instruction cache operations (above) take effect
-    // before the isb (below).
-    "   dsb  ish\n"
-
-    // Ensure that any instructions already in the pipeline are discarded and
-    // reloaded from the new data.
-    // isb : Instruction Synchronisation Barrier
-    "   isb\n"
-    : : : "memory");
+  __asm__ __volatile__(
+      // Make sure that the instruction cache operations (above) take effect
+      // before the isb (below).
+      "   dsb  ish\n"
 
-  if (!codeIsThreadLocal) {
-    // If we're on a background thread, emit a membarrier that will synchronize
-    // all the executing threads with the new version of the code.
-    JSContext* cx = js::TlsContext.get();
-    if (!cx || !cx->isMainThreadContext()) {
-      MOZ_RELEASE_ASSERT(CPU::CanFlushICacheFromBackgroundThreads());
-      // The intent to use this command has been carried over in
-      // CanFlushICacheFromBackgroundThreads.
-      if (membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE, 0) != 0) {
-        // Better safe than sorry.
-        MOZ_CRASH("membarrier can't be executed");
-      }
-    }
-  }
+      // Ensure that any instructions already in the pipeline are discarded and
+      // reloaded from the new data.
+      // isb : Instruction Synchronisation Barrier
+      "   isb\n"
+      :
+      :
+      : "memory");
 #else
   // If the host isn't AArch64, we must be using the simulator, so this function
   // doesn't have to do anything.
diff --git a/js/src/jit/loong64/Architecture-loong64.cpp b/js/src/jit/loong64/Architecture-loong64.cpp
index d838eac16b8f4..6b1069a592ac2 100644
--- a/js/src/jit/loong64/Architecture-loong64.cpp
+++ b/js/src/jit/loong64/Architecture-loong64.cpp
@@ -68,7 +68,7 @@ bool CPUFlagsHaveBeenComputed() {
 
 uint32_t GetLOONG64Flags() { return 0; }
 
-void FlushICache(void* code, size_t size, bool codeIsThreadLocal) {
+void FlushICache(void* code, size_t size) {
 #if defined(JS_SIMULATOR)
   js::jit::SimulatorProcess::FlushICache(code, size);
 
diff --git a/js/src/jit/mips-shared/Architecture-mips-shared.cpp b/js/src/jit/mips-shared/Architecture-mips-shared.cpp
index ed56ed72502e0..e3017adb4f70b 100644
--- a/js/src/jit/mips-shared/Architecture-mips-shared.cpp
+++ b/js/src/jit/mips-shared/Architecture-mips-shared.cpp
@@ -87,7 +87,7 @@ Registers::Code Registers::FromName(const char* name) {
   return Invalid;
 }
 
-void FlushICache(void* code, size_t size, bool codeIsThreadLocal) {
+void FlushICache(void* code, size_t size) {
 #if defined(JS_SIMULATOR)
   js::jit::SimulatorProcess::FlushICache(code, size);
 
diff --git a/js/src/jit/moz.build b/js/src/jit/moz.build
index 69af98f953beb..4f2765c060227 100644
--- a/js/src/jit/moz.build
+++ b/js/src/jit/moz.build
@@ -37,6 +37,7 @@ UNIFIED_SOURCES += [
     "EdgeCaseAnalysis.cpp",
     "EffectiveAddressAnalysis.cpp",
     "ExecutableAllocator.cpp",
+    "FlushICache.cpp",
     "FoldLinearArithConstants.cpp",
     "InlinableNatives.cpp",
     "InstructionReordering.cpp",
diff --git a/js/src/jsapi-tests/testsJit.cpp b/js/src/jsapi-tests/testsJit.cpp
index ac2c1c7a3cbb8..29de274004862 100644
--- a/js/src/jsapi-tests/testsJit.cpp
+++ b/js/src/jsapi-tests/testsJit.cpp
@@ -68,8 +68,8 @@ bool ExecuteJit(JSContext* cx, js::jit::MacroAssembler& masm) {
   if (!code) {
     return false;
   }
-  if (!ExecutableAllocator::makeExecutableAndFlushICache(
-          FlushICacheSpec::LocalThreadOnly, code->raw(), code->bufferSize())) {
+  if (!ExecutableAllocator::makeExecutableAndFlushICache(code->raw(),
+                                                         code->bufferSize())) {
     return false;
   }
 
diff --git a/js/src/wasm/WasmBuiltins.cpp b/js/src/wasm/WasmBuiltins.cpp
index f1f49937a3d97..8f53d5d1327fb 100644
--- a/js/src/wasm/WasmBuiltins.cpp
+++ b/js/src/wasm/WasmBuiltins.cpp
@@ -1756,9 +1756,8 @@ bool wasm::EnsureBuiltinThunksInitialized() {
   MOZ_ASSERT(masm.trapSites().empty());
   MOZ_ASSERT(masm.tryNotes().empty());
 
-  if (!ExecutableAllocator::makeExecutableAndFlushICache(
-          FlushICacheSpec::LocalThreadOnly, thunks->codeBase,
-          thunks->codeSize)) {
+  if (!ExecutableAllocator::makeExecutableAndFlushICache(thunks->codeBase,
+                                                         thunks->codeSize)) {
     return false;
   }
 
diff --git a/js/src/wasm/WasmCode.cpp b/js/src/wasm/WasmCode.cpp
index 34df415bef868..1828037807f31 100644
--- a/js/src/wasm/WasmCode.cpp
+++ b/js/src/wasm/WasmCode.cpp
@@ -331,7 +331,7 @@ UniqueModuleSegment ModuleSegment::create(Tier tier, const Bytes& unlinkedBytes,
                                        linkData);
 }
 
-bool ModuleSegment::initialize(IsTier2 isTier2, const CodeTier& codeTier,
+bool ModuleSegment::initialize(const CodeTier& codeTier,
                                const LinkData& linkData,
                                const Metadata& metadata,
                                const MetadataTier& metadataTier) {
@@ -341,13 +341,9 @@ bool ModuleSegment::initialize(IsTier2 isTier2, const CodeTier& codeTier,
 
   // Optimized compilation finishes on a background thread, so we must make sure
   // to flush the icaches of all the executing threads.
-  FlushICacheSpec flushIcacheSpec = isTier2 == IsTier2::Tier2
-                                        ? FlushICacheSpec::AllThreads
-                                        : FlushICacheSpec::LocalThreadOnly;
-
   // Reprotect the whole region to avoid having separate RW and RX mappings.
   if (!ExecutableAllocator::makeExecutableAndFlushICache(
-          flushIcacheSpec, base(), RoundupCodeLength(length()))) {
+          base(), RoundupCodeLength(length()))) {
     return false;
   }
 
@@ -499,7 +495,6 @@ static constexpr unsigned LAZY_STUB_LIFO_DEFAULT_CHUNK_SIZE = 8 * 1024;
 
 bool LazyStubTier::createManyEntryStubs(const Uint32Vector& funcExportIndices,
                                         const CodeTier& codeTier,
-                                        bool flushAllThreadsIcaches,
                                         size_t* stubSegmentIndex) {
   MOZ_ASSERT(funcExportIndices.length());
 
@@ -579,13 +574,7 @@ bool LazyStubTier::createManyEntryStubs(const Uint32Vector& funcExportIndices,
     Assembler::Bind(codePtr, label);
   }
 
-  // Optimized compilation finishes on a background thread, so we must make sure
-  // to flush the icaches of all the executing threads.
-  FlushICacheSpec flushIcacheSpec = flushAllThreadsIcaches
-                                        ? FlushICacheSpec::AllThreads
-                                        : FlushICacheSpec::LocalThreadOnly;
-  if (!ExecutableAllocator::makeExecutableAndFlushICache(flushIcacheSpec,
-                                                         codePtr, codeLength)) {
+  if (!ExecutableAllocator::makeExecutableAndFlushICache(codePtr, codeLength)) {
     return false;
   }
 
@@ -629,14 +618,8 @@ bool LazyStubTier::createOneEntryStub(uint32_t funcExportIndex,
     return false;
   }
 
-  // This happens on the executing thread (when createOneEntryStub is called
-  // from GetInterpEntryAndEnsureStubs), so no need to flush the icaches on all
-  // the threads.
-  bool flushAllThreadIcaches = false;
-
   size_t stubSegmentIndex;
-  if (!createManyEntryStubs(funcExportIndexes, codeTier, flushAllThreadIcaches,
-                            &stubSegmentIndex)) {
+  if (!createManyEntryStubs(funcExportIndexes, codeTier, &stubSegmentIndex)) {
     return false;
   }
 
@@ -667,13 +650,8 @@ bool LazyStubTier::createTier2(const Uint32Vector& funcExportIndices,
     return true;
   }
 
-  // This compilation happens on a background compiler thread, so the icache may
-  // need to be flushed on all the threads.
-  bool flushAllThreadIcaches = true;
-
   size_t stubSegmentIndex;
-  if (!createManyEntryStubs(funcExportIndices, codeTier, flushAllThreadIcaches,
-                            &stubSegmentIndex)) {
+  if (!createManyEntryStubs(funcExportIndices, codeTier, &stubSegmentIndex)) {
     return false;
   }
 
@@ -849,15 +827,15 @@ bool Metadata::getFuncName(NameContext ctx, uint32_t funcIndex,
   return AppendFunctionIndexName(funcIndex, name);
 }
 
-bool CodeTier::initialize(IsTier2 isTier2, const Code& code,
-                          const LinkData& linkData, const Metadata& metadata) {
+bool CodeTier::initialize(const Code& code, const LinkData& linkData,
+                          const Metadata& metadata) {
   MOZ_ASSERT(!initialized());
   code_ = &code;
 
   MOZ_ASSERT(lazyStubs_.readLock()->entryStubsEmpty());
 
   // See comments in CodeSegment::initialize() for why this must be last.
-  if (!segment_->initialize(isTier2, *this, linkData, metadata, *metadata_)) {
+  if (!segment_->initialize(*this, linkData, metadata, *metadata_)) {
     return false;
   }
 
@@ -946,7 +924,7 @@ Code::Code(UniqueCodeTier tier1, const Metadata& metadata,
 bool Code::initialize(const LinkData& linkData) {
   MOZ_ASSERT(!initialized());
 
-  if (!tier1_->initialize(IsTier2::NotTier2, *this, linkData, *metadata_)) {
+  if (!tier1_->initialize(*this, linkData, *metadata_)) {
     return false;
   }
 
@@ -960,7 +938,7 @@ bool Code::setAndBorrowTier2(UniqueCodeTier tier2, const LinkData& linkData,
   MOZ_RELEASE_ASSERT(tier2->tier() == Tier::Optimized &&
                      tier1_->tier() == Tier::Baseline);
 
-  if (!tier2->initialize(IsTier2::Tier2, *this, linkData, *metadata_)) {
+  if (!tier2->initialize(*this, linkData, *metadata_)) {
     return false;
   }
 
diff --git a/js/src/wasm/WasmCode.h b/js/src/wasm/WasmCode.h
index ede89a0271378..3ef88c7e53a08 100644
--- a/js/src/wasm/WasmCode.h
+++ b/js/src/wasm/WasmCode.h
@@ -209,8 +209,6 @@ class CodeSegment {
 
 using UniqueModuleSegment = UniquePtr<ModuleSegment>;
 
-enum IsTier2 { Tier2, NotTier2 };
-
 class ModuleSegment : public CodeSegment {
   const Tier tier_;
   uint8_t* const trapCode_;
@@ -224,9 +222,8 @@ class ModuleSegment : public CodeSegment {
   static UniqueModuleSegment create(Tier tier, const Bytes& unlinkedBytes,
                                     const LinkData& linkData);
 
-  bool initialize(IsTier2 isTier2, const CodeTier& codeTier,
-                  const LinkData& linkData, const Metadata& metadata,
-                  const MetadataTier& metadataTier);
+  bool initialize(const CodeTier& codeTier, const LinkData& linkData,
+                  const Metadata& metadata, const MetadataTier& metadataTier);
 
   Tier tier() const { return tier_; }
 
@@ -591,7 +588,6 @@ class LazyStubTier {
 
   [[nodiscard]] bool createManyEntryStubs(const Uint32Vector& funcExportIndices,
                                           const CodeTier& codeTier,
-                                          bool flushAllThreadsIcaches,
                                           size_t* stubSegmentIndex);
 
  public:
@@ -654,7 +650,7 @@ class CodeTier {
         lazyStubs_(mutexForTier(segment_->tier())) {}
 
   bool initialized() const { return !!code_ && segment_->initialized(); }
-  bool initialize(IsTier2 isTier2, const Code& code, const LinkData& linkData,
+  bool initialize(const Code& code, const LinkData& linkData,
                   const Metadata& metadata);
 
   Tier tier() const { return segment_->tier(); }
diff --git a/js/src/wasm/WasmCompile.cpp b/js/src/wasm/WasmCompile.cpp
index 26534bca4ea47..842f75d07dda9 100644
--- a/js/src/wasm/WasmCompile.cpp
+++ b/js/src/wasm/WasmCompile.cpp
@@ -26,6 +26,7 @@
 #  include "jit/ProcessExecutableMemory.h"
 #endif
 
+#include "jit/FlushICache.h"
 #include "util/Text.h"
 #include "vm/HelperThreads.h"
 #include "vm/Realm.h"
@@ -572,6 +573,11 @@ static bool TieringBeneficial(uint32_t codeSize) {
   return true;
 }
 
+// Ensure that we have the non-compiler requirements to tier safely.
+static bool PlatformCanTier() {
+  return CanUseExtraThreads() && jit::CanFlushExecutionContextForAllThreads();
+}
+
 CompilerEnvironment::CompilerEnvironment(const CompileArgs& args)
     : state_(InitialWithArgs), args_(&args) {}
 
@@ -590,20 +596,6 @@ void CompilerEnvironment::computeParameters() {
   state_ = Computed;
 }
 
-// Check that this architecture either:
-// - is cache-coherent, which is the case for most tier-1 architectures we care
-// about.
-// - or has the ability to invalidate the instruction cache of all threads, so
-// background compilation in tiered compilation can be synchronized across all
-// threads.
-static bool IsICacheSafe() {
-#ifdef JS_CODEGEN_ARM64
-  return jit::CanFlushICacheFromBackgroundThreads();
-#else
-  return true;
-#endif
-}
-
 void CompilerEnvironment::computeParameters(Decoder& d) {
   MOZ_ASSERT(!isComputed());
 
@@ -633,8 +625,9 @@ void CompilerEnvironment::computeParameters(Decoder& d) {
     codeSectionSize = range.size;
   }
 
-  if (baselineEnabled && hasSecondTier && CanUseExtraThreads() &&
-      (TieringBeneficial(codeSectionSize) || forceTiering) && IsICacheSafe()) {
+  if (baselineEnabled && hasSecondTier &&
+      (TieringBeneficial(codeSectionSize) || forceTiering) &&
+      PlatformCanTier()) {
     mode_ = CompileMode::Tier1;
     tier_ = Tier::Baseline;
   } else {
diff --git a/js/src/wasm/WasmModule.cpp b/js/src/wasm/WasmModule.cpp
index b3bb41cdaf400..406fb12dc3242 100644
--- a/js/src/wasm/WasmModule.cpp
+++ b/js/src/wasm/WasmModule.cpp
@@ -20,7 +20,8 @@
 
 #include <chrono>
 
-#include "js/BuildId.h"                 // JS::BuildIdCharVector
+#include "jit/FlushICache.h"  // for FlushExecutionContextForAllThreads
+#include "js/BuildId.h"       // JS::BuildIdCharVector
 #include "js/experimental/TypedData.h"  // JS_NewUint8Array
 #include "js/friend/ErrorMessages.h"    // js::GetErrorMessage, JSMSG_*
 #include "js/Printf.h"                  // JS_smprintf
@@ -212,6 +213,14 @@ bool Module::finishTier2(const LinkData& linkData2,
       return false;
     }
 
+    // Initializing the code above will have flushed the icache for all cores.
+    // However, there could still be stale data in the execution pipeline of
+    // other cores on some platforms. Force an execution context flush on all
+    // threads to fix this before we commit the code.
+    //
+    // This is safe due to the check in `PlatformCanTier` in WasmCompile.cpp
+    jit::FlushExecutionContextForAllThreads();
+
     // Now that we can't fail or otherwise abort tier2, make it live.
 
     MOZ_ASSERT(!code().hasTier2());

-- 
To stop receiving notification emails like this one, please contact
the administrator of this repository.


More information about the tbb-commits mailing list