UPDATE: Seems this is a bug for MSVC, but it also happens with ICC version 14, with highest optimizations (/o3) turned on.
UPDATE2: With optimizations turned off with ICC I got:
- std::array 159,000
- raw array 117,000
- vector 162,313
I am comparing the performance of std::array vs raw array vs std::vector using the below code. I have tested using the MSVC 2012 compiler and Intel compiler vs 14, on Win 7 64, with 64-bit compiling. CPU is Intel 3rd generation.
The results are (consistently):
- std::array ~35,600
- raw array ~35,600
- vector ~40,000
When I checked the assembly the compilers choose the XMM registers for the std::array and raw array and therefore presumably some sort of SIMD processing is taking place? However, for the std::vector the regular r8-r15 registers are used.
Assuming I am correct with the above, why aren't the XMM registers used for an std::vector?
Here is the fully-working test code (you will need to increase your default stack reserve size):
#include <iostream>
#include <vector>
#include <array>
const unsigned int noElements = 10000000;
const unsigned int noIterations = 500;
void testVector(){
volatile unsigned long long sum = 0;
unsigned long long start = 0;
unsigned long long finish = 0;
unsigned int x;
unsigned int y;
std::vector<unsigned int> vec;
vec.resize(noElements);
start = __rdtscp(&x);
for(int i=0; i<noIterations; i++){
for(int i=0; i<noElements; i++){
vec[i] = i;
}
for(int i=0; i<noElements; i++){
sum += (3 * vec[i]);
}
}
finish = __rdtscp(&y);
std::cout << "std::vector:\t" << (finish - start)/1000000 << std::endl;
}
void testRawArray(){
volatile unsigned long long sum = 0;
unsigned long long start = 0;
unsigned long long finish = 0;
unsigned int x;
unsigned int y;
unsigned int myRawArray[noElements];
start = __rdtscp(&x);
for(int i=0; i<noIterations; i++){
for(int i=0; i<noElements; i++){
myRawArray[i] = i;
}
for(int i=0; i<noElements; i++){
sum += (3 * myRawArray[i]);
}
}
finish = __rdtscp(&y);
std::cout << "raw array: \t" << (finish - start)/1000000 << std::endl;
}
void testStdArray(){
volatile unsigned long long sum = 0;
unsigned long long start = 0;
unsigned long long finish = 0;
unsigned int x;
unsigned int y;
std::array<unsigned int, noElements> myStdArray;
start = __rdtscp(&x);
for(int i=0; i<noIterations; i++){
for(int i=0; i<noElements; i++){
myStdArray[i] = i;
}
for(int i=0; i<noElements; i++){
sum += (3 * myStdArray[i]);
}
}
finish = __rdtscp(&y);
std::cout << "std::array: \t" << (finish - start)/1000000 << std::endl;
}
int main(){
testStdArray();
testRawArray();
testVector();
}