# --- T2-COPYRIGHT-BEGIN --- # t2/package/*/rocr-runtime/hotfix-x86-mm.patch # Copyright (C) 2025 The T2 SDE Project # SPDX-License-Identifier: GPL-2.0 or patched project license # --- T2-COPYRIGHT-END --- --- ROCR-Runtime-rocm-6.3.3/runtime/hsa-runtime/core/runtime/amd_aql_queue.cpp.vanilla 2025-03-23 18:08:59.706285598 +0100 +++ ROCR-Runtime-rocm-6.3.3/runtime/hsa-runtime/core/runtime/amd_aql_queue.cpp 2025-03-23 18:12:17.993368666 +0100 @@ -1551,7 +1551,9 @@ memcpy(&queue_slot[1], &slot_data[1], slot_size_b - sizeof(uint32_t)); if (core::Runtime::runtime_singleton_->flag().dev_mem_queue() && !agent_->is_xgmi_cpu_gpu()) { // Ensure the packet body is written as header may get reordered when writing over PCIE +#if defined(__i386__) || defined(__x86_64__) _mm_sfence(); +#endif } atomic::Store(&queue_slot[0], slot_data[0], std::memory_order_release); --- ROCR-Runtime-rocm-6.3.3/runtime/hsa-runtime/core/runtime/intercept_queue.cpp.vanilla 2025-03-23 18:10:39.496495157 +0100 +++ ROCR-Runtime-rocm-6.3.3/runtime/hsa-runtime/core/runtime/intercept_queue.cpp 2025-03-23 18:12:50.363436643 +0100 @@ -258,7 +258,9 @@ ring[barrier & mask].barrier_and.completion_signal = Signal::Convert(async_doorbell_); if (Runtime::runtime_singleton_->flag().dev_mem_queue() && !needsPcieOrdering()) { // Ensure the packet body is written as header may get reordered when writing over PCIE +#if defined(__i386__) || defined(__x86_64__) _mm_sfence(); +#endif } atomic::Store(&ring[barrier & mask].barrier_and.header, kBarrierHeader, std::memory_order_release); @@ -305,7 +307,9 @@ if (write_index != 0) { if (Runtime::runtime_singleton_->flag().dev_mem_queue() && !needsPcieOrdering()) { // Ensure the packet body is written as header may get reordered when writing over PCIE +#if defined(__i386__) || defined(__x86_64__) _mm_sfence(); +#endif } atomic::Store(&ring[write & mask].packet.header, packets[first_written_packet_index].packet.header, std::memory_order_release); @@ -374,7 +378,9 @@ handler.first(&ring[i & mask], 1, i, handler.second, PacketWriter); if (Runtime::runtime_singleton_->flag().dev_mem_queue() && !needsPcieOrdering()) { // Ensure the packet body is written as header may get reordered when writing over PCIE +#if defined(__i386__) || defined(__x86_64__) _mm_sfence(); +#endif } // Invalidate consumed packet. atomic::Store(&ring[i & mask].packet.header, kInvalidHeader, std::memory_order_release); --- ROCR-Runtime-rocm-6.3.3/runtime/hsa-runtime/core/runtime/amd_blit_kernel.cpp.vanilla 2025-03-23 18:11:30.663269273 +0100 +++ ROCR-Runtime-rocm-6.3.3/runtime/hsa-runtime/core/runtime/amd_blit_kernel.cpp 2025-03-23 18:12:22.746711981 +0100 @@ -1274,7 +1274,9 @@ std::atomic_thread_fence(std::memory_order_release); if (core::Runtime::runtime_singleton_->flag().dev_mem_queue() && !queue_->needsPcieOrdering()) { // Ensure the packet body is written as header may get reordered when writing over PCIE +#if defined(__i386__) || defined(__x86_64__) _mm_sfence(); +#endif } queue_buffer[index & queue_bitmask_].header = kDispatchPacketHeader; --- ROCR-Runtime-rocm-6.3.3/runtime/hsa-runtime/core/util/locks.h.vanilla 2025-03-23 18:07:24.269418515 +0100 +++ ROCR-Runtime-rocm-6.3.3/runtime/hsa-runtime/core/util/locks.h 2025-03-23 18:14:12.943610061 +0100 @@ -72,7 +72,9 @@ while (!lock_.compare_exchange_strong(old, 1)) { cnt--; if (cnt > maxSpinIterPause) { +#if defined(__i386__) || defined(__x86_64__) _mm_pause(); +#endif } else if (cnt-- > maxSpinIterYield) { os::YieldThread(); } else { --- ROCR-Runtime-rocm-6.3.3/runtime/hsa-runtime/core/util/atomic_helpers.h.vanilla 2025-03-23 18:11:30.813269588 +0100 +++ ROCR-Runtime-rocm-6.3.3/runtime/hsa-runtime/core/util/atomic_helpers.h 2025-03-23 18:13:38.396870846 +0100 @@ -98,7 +98,9 @@ case std::memory_order_release: case std::memory_order_seq_cst: case std::memory_order_acq_rel: +#if defined(__i386__) || defined(__x86_64__) _mm_sfence(); +#endif default:; } #endif