### Slides

```Renderscript
OpenMP
CUDA
C++ AMP
PPL
TBB
MPI
OpenACC
OpenCL
Cilk Plus GCD
void quicksort(int *v, int start, int end) {
if (start < end) {
int pivot = partition(v, start, end);
quicksort(v, start, pivot - 1);
quicksort(v, pivot + 1, end);
}
}
void quicksort(int *v, int start, int end) {
if (start < end) {
Problem 1:
expensive
int pivot = partition(v, start, end);
quicksort(v, start, pivot - 1);
});
quicksort(v, pivot + 1, end);
});
t1.join();
t2.join();
}
}
Problem 3:
Exceptions??
Problem 2:
Fork-join not enforced
void quicksort(int *v, int start, int end) {
if (start < end) {
int pivot = partition(v, start, end);
parallel region
quicksort(v, start, pivot - 1);
quicksort(v, pivot + 1, end);
}
}
void quicksort(int *v, int start, int end) {
if (start < end) {
parallel region
int pivot = partition(v, start, end);
r.run([&] {
quicksort(v, start, pivot - 1);
});
r.run([&] {
quicksort(v, pivot + 1, end);
});
});
}
}
proc 1
proc 2
proc 3
proc 4
proc 1
proc 2
proc 3
proc 4
Old items
New items
proc 1
proc 2
proc 3
proc 4
Old items
New items
proc 1
proc 2
proc 3
proc 4
Old items
New items
proc 1
proc 2
proc 3
proc 4
Old items
New items
“Thief”
e();
Q1: What
e()
Q2: What
r.run(f);
g();
});
h();
g()
f()
Q3: What
h()
for(int i=0; i<n; ++i)
r.run(f);
});
std::sort(begin, end,
[](int a, int b) { return a < b; });
int comparisons = 0;
std::sort(begin, end,
[&](int a, int b) { comparisons++; return a < b; });
template<typename Iterator, typename Compare>
void chaos_sort( Iterator first, Iterator last, Compare comp ) {
auto n = last-first;
std::vector<char> c(n);
for(;;) {
bool flag = false;
for( size_t i=1; i<n; ++i ) {
c[i] = comp(first[i],first[i-1]);
flag |= c[i];
}
if( !flag ) break;
for( size_t i=1; i<n; ++i )
if( c[i] )
std::swap( first[i-1], first[i] );
}
}
extern const sequential_execution_policy seq;
extern const parallel_execution_policy par;
extern const parallel_vector_execution_policy par_vec;
class execution_policy
{
public:
// ...
const type_info& target_type() const;
template<class T> T *target();
template<class T> const T *target() const;
};
std::vector<int> vec = ...
// standard sequential sort
std::sort(vec.begin(), vec.end());
using namespace std::experimental::parallel;
// explicitly sequential sort
sort(seq, vec.begin(), vec.end());
// permitting parallel execution
sort(par, vec.begin(), vec.end());
// permitting vectorization as well
sort(par_vec, vec.begin(), vec.end());
size_t threshold = ...
execution_policy exec = seq;
if(vec.size() > threshold)
{
exec = par;
}
sort(exec, vec.begin(), vec.end());
try
{
r = std::inner_product(std::par, a.begin(), a.end(), b.begin(),
func1, func2, 0);
}
catch(const exception_list& list)
{
for(auto& exptr : list)
{
// process exception pointer exptr
}
}
int a[n] = ...;
int b[n] = ...;
for(int i=0; i<n; ++i)
{
a[i] = b[i] + c;
}
movdqu
movdqu
movdqu
xmm1, XMMWORD PTR _b\$[esp+eax+132]
xmm0, XMMWORD PTR _a\$[esp+eax+132]
xmm1, xmm2
xmm1, xmm0
XMMWORD PTR _a\$[esp+eax+132], xmm1
a[i:i+3] = b[i:i+3] + c;
void f(int* a, int*b)
{
for(int i=0; i<n; ++i)
{
a[i] = b[i] + c;
func();
Aliasing?
mov
call
}
}
Side effects?
Dependence?
Exceptions?
ecx, DWORD PTR _b\$[esp+esi+140]
ecx, edi
DWORD PTR _a\$[esp+esi+140], ecx
func
void f(float* a, float*b)
{
for(int i=0; i<n; ++i)
{
a[i] = b[i] + c;
func();
}
}
for(int i=0; i<n; i+=4)
{
a[i:i+3] = b[i:i+3] + c;
for(int j=0; j<4; ++j)
func();
}
for(int i=0; i<n; ++i)
{
lock.enter();
a[i] = b[i] + c;
lock.release();
}
?
for(int i=0; i<n; i+=4)
{
for(int j=0; j<4; ++j)
lock.enter();
a[i:i+3] = b[i:i+3] + c;
for(int j=0; j<4; ++j)
lock.release();
}
void f(float* a, float*b)
{
for(int i=0; i<n; ++i)
{
// OK:
a[i] = b[i] + c;
void f(float* a, float*b)
{
std::for_each(a, b, [&](float f)
{
// Oops, no ‘i’:
a[i] = b[i] + c;
func();
func();
}
}
});
}
void f(float* a, float*b)
{
integer_iterator begin {0};
integer_iterator end {b-a};
// almost, see N3976
std::for_each( std::par_vec, begin, end, [&](int i)
{
a[i] = b[i] + c;
func();
});
}
```