Skip to content

Commit

Permalink
Merge pull request #550 from howjmay/fix-prefetch
Browse files Browse the repository at this point in the history
fix: Align _mm_prefetch behavior to document
  • Loading branch information
jserv authored Nov 2, 2022
2 parents a302995 + 4cd4383 commit a387dd5
Show file tree
Hide file tree
Showing 2 changed files with 70 additions and 15 deletions.
37 changes: 23 additions & 14 deletions sse2neon.h
Original file line number Diff line number Diff line change
Expand Up @@ -617,16 +617,12 @@ FORCE_INLINE uint16_t _sse2neon_vaddvq_u16(uint16x8_t a)
+------+------+------+------+------+------+-------------+
*/

/* Constants for use with _mm_prefetch. */
/* Constants for use with _mm_prefetch. */
enum _mm_hint {
_MM_HINT_NTA = 0, /* load data to L1 and L2 cache, mark it as NTA */
_MM_HINT_T0 = 1, /* load data to L1 and L2 cache */
_MM_HINT_T1 = 2, /* load data to L2 cache only */
_MM_HINT_T2 = 3, /* load data to L2 cache only, mark it as NTA */
_MM_HINT_ENTA = 4, /* exclusive version of _MM_HINT_NTA */
_MM_HINT_ET0 = 5, /* exclusive version of _MM_HINT_T0 */
_MM_HINT_ET1 = 6, /* exclusive version of _MM_HINT_T1 */
_MM_HINT_ET2 = 7 /* exclusive version of _MM_HINT_T2 */
_MM_HINT_NTA = 0, /* load data to L1 and L2 cache, mark it as NTA */
_MM_HINT_T0 = 1, /* load data to L1 and L2 cache */
_MM_HINT_T1 = 2, /* load data to L2 cache only */
_MM_HINT_T2 = 3, /* load data to L2 cache only, mark it as NTA */
};

// The bit field mapping to the FPCR(floating-point control register)
Expand Down Expand Up @@ -2354,12 +2350,25 @@ FORCE_INLINE __m128 _mm_or_ps(__m128 a, __m128 b)
// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_m_pmulhuw
#define _m_pmulhuw(a, b) _mm_mulhi_pu16(a, b)

// Loads one cache line of data from address p to a location closer to the
// processor. https://msdn.microsoft.com/en-us/library/84szxsww(v=vs.100).aspx
FORCE_INLINE void _mm_prefetch(const void *p, int i)
// Fetch the line of data from memory that contains address p to a location in
// the cache heirarchy specified by the locality hint i.
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_prefetch
FORCE_INLINE void _mm_prefetch(char const *p, int i)
{
(void) i;
__builtin_prefetch(p);
switch (i) {
case _MM_HINT_NTA:
__builtin_prefetch(p, 0, 0);
break;
case _MM_HINT_T0:
__builtin_prefetch(p, 0, 3);
break;
case _MM_HINT_T1:
__builtin_prefetch(p, 0, 2);
break;
case _MM_HINT_T2:
__builtin_prefetch(p, 0, 1);
break;
}
}

// Compute the absolute differences of packed unsigned 8-bit integers in a and
Expand Down
48 changes: 47 additions & 1 deletion tests/impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2493,7 +2493,53 @@ result_t test_m_pmulhuw(const SSE2NEONTestImpl &impl, uint32_t iter)

result_t test_mm_prefetch(const SSE2NEONTestImpl &impl, uint32_t iter)
{
return TEST_UNIMPL;
typedef struct {
__m128 a;
float r[4];
} prefetch_test_t;
prefetch_test_t test_vec[8] = {
{
_mm_set_ps(-0.1f, 0.2f, 0.3f, 0.4f),
{0.4f, 0.3f, 0.2f, -0.1f},
},
{
_mm_set_ps(0.5f, 0.6f, -0.7f, -0.8f),
{-0.8f, -0.7f, 0.6f, 0.5f},
},
{
_mm_set_ps(0.9f, 0.10f, -0.11f, 0.12f),
{0.12f, -0.11f, 0.10f, 0.9f},
},
{
_mm_set_ps(-1.1f, -2.1f, -3.1f, -4.1f),
{-4.1f, -3.1f, -2.1f, -1.1f},
},
{
_mm_set_ps(100.0f, -110.0f, 120.0f, -130.0f),
{-130.0f, 120.0f, -110.0f, 100.0f},
},
{
_mm_set_ps(200.5f, 210.5f, -220.5f, 230.5f),
{995.74f, -93.04f, 144.03f, 902.50f},
},
{
_mm_set_ps(10.11f, -11.12f, -12.13f, 13.14f),
{13.14f, -12.13f, -11.12f, 10.11f},
},
{
_mm_set_ps(10.1f, -20.2f, 30.3f, 40.4f),
{40.4f, 30.3f, -20.2f, 10.1f},
},
};

for (size_t i = 0; i < (sizeof(test_vec) / (sizeof(test_vec[0]))); i++) {
_mm_prefetch(((const char *) &test_vec[i].a), _MM_HINT_T0);
_mm_prefetch(((const char *) &test_vec[i].a), _MM_HINT_T1);
_mm_prefetch(((const char *) &test_vec[i].a), _MM_HINT_T2);
_mm_prefetch(((const char *) &test_vec[i].a), _MM_HINT_NTA);
}

return TEST_SUCCESS;
}

result_t test_m_psadbw(const SSE2NEONTestImpl &impl, uint32_t iter)
Expand Down

0 comments on commit a387dd5

Please sign in to comment.