pci_handler: Use volatile

While revisiting this code, it seems we are not using volatile. We
should use it to be safe.

As a best practice, we should read in loop instead of relying on memcpy
with volatile.

Signed-off-by: Brandon Kim <brandonkim@google.com>
Change-Id: I7b10a87836378811557dfde5c505cb2cfacb771d
diff --git a/src/pci_handler.cpp b/src/pci_handler.cpp
index 0936032..39ddf77 100644
--- a/src/pci_handler.cpp
+++ b/src/pci_handler.cpp
@@ -41,7 +41,17 @@
         (offset + length < regionSize) ? length : regionSize - offset;
     std::vector<uint8_t> results(finalLength);
 
-    std::memcpy(results.data(), mmap.get().data() + offset, finalLength);
+    // Use a volatile pointer to ensure every access reads directly from the
+    // memory-mapped region, preventing compiler optimizations like caching.
+    const volatile uint8_t* src =
+        reinterpret_cast<volatile const uint8_t*>(mmap.get().data() + offset);
+
+    // Perform a byte-by-byte copy to avoid undefined behavior with memcpy on
+    // volatile memory.
+    for (uint32_t i = 0; i < finalLength; ++i)
+    {
+        results[i] = src[i];
+    }
     return results;
 }
 
@@ -61,7 +71,16 @@
     // Write up to regionSize in case the offset + length overflowed
     uint16_t finalLength =
         (offset + length < regionSize) ? length : regionSize - offset;
-    std::memcpy(mmap.get().data() + offset, bytes.data(), finalLength);
+    // Use a volatile pointer to ensure every access writes directly to the
+    // memory-mapped region.
+    volatile uint8_t* dest =
+        reinterpret_cast<volatile uint8_t*>(mmap.get().data() + offset);
+
+    // Perform a byte-by-byte copy to ensure volatile semantics.
+    for (uint16_t i = 0; i < finalLength; ++i)
+    {
+        dest[i] = bytes[i];
+    }
     return finalLength;
 }