Fast memory copy (SSE4)
Visual Studio 2008 has supports “Enable Instruction Functions” options (see a project settings -> C/C++ -> Optimization). Note that this option can enlarge code.
Also memcpy function implementation has written with using sse2 (movdqa).
int CopyMemSSE4(int* piDst, int* piSrc, unsigned long SizeInBytes)
{
// Initialize pointers to start of the USWC memory
_asm
{
mov esi, piSrc
mov edx, piSrc
// Initialize pointer to end of the USWC memory
add edx, SizeInBytes
// Initialize pointer to start of the cacheable WB buffer
mov edi, piDst
// Start of Bulk Load loop
inner_start:
// Load data from USWC Memory using Streaming Load
MOVNTDQA xmm0, xmmword ptr [esi]
MOVNTDQA xmm1, xmmword ptr [esi+16]
MOVNTDQA xmm2, xmmword ptr [esi+32]
MOVNTDQA xmm3, xmmword ptr [esi+48]
// Copy data to buffer
MOVDQA xmmword ptr [edi], xmm0
MOVDQA xmmword ptr [edi+16], xmm1
MOVDQA xmmword ptr [edi+32], xmm2
MOVDQA xmmword ptr [edi+48], xmm3
// Increment pointers by cache line size and test for end of loop
add esi, 040h
add edi, 040h
cmp esi, edx
jne inner_start
}
// End of Bulk Load loop
return 0;
}
#define DATA_SIZE 0x01000000
int main(int argc, char* argv[])
{
int *piSrc = NULL;
int *piDst = NULL;
unsigned long dwDataSizeInBytes = sizeof(int) * DATA_SIZE;
piSrc = (int *)_aligned_malloc(dwDataSizeInBytes, dwDataSizeInBytes);
piDst = (int *)_aligned_malloc(dwDataSizeInBytes, dwDataSizeInBytes);
memset(piSrc, 255, dwDataSizeInBytes);
memset(piDst, 0, dwDataSizeInBytes);
CopyMemSSE4(piDst, piSrc, dwDataSizeInBytes);
_aligned_free(piSrc);
_aligned_free(piDst);
}
Additional links:
Get 97 short and extremely useful tips from some of the most experienced and respected practitioners in the industry, including Uncle Bob Martin, Scott Meyers, Dan North, Linda Rising, Udi Dahan, Neal Ford, and many more. They encourage you to stretch yourself by learning new languages, looking at problems in new ways, following specific practices, taking responsibility for your work, and becoming as good at the entire craft of programming as you possibly can.