Back to TILs

C++ reduce

Table of contents

  vector<int> vec{ 0, 1, 2, 3, 4, 5 };

  auto sum0 = accumulate( begin( vec ), end( vec ), 0.0 );        // sequential
  auto sum1 = reduce( begin( vec ), end( vec ) );                 // sequential
  auto sum2 = reduce( execution::seq, begin( vec ), end( vec ) ); // sequential
  auto sum3 = reduce( execution::par, begin( vec ), end( vec ) ); // parallel
  cout << "Sum: " << sum0 << " " << sum1 << " " << sum2 << " " << sum3 << '\n';

Performance

The average time to operate over 10M items normalized by the accumulate time is:

Though std::reduce seems faster compared to std::accumulate, you should be careful when using it with non-commutative binary operations.

  const vector<double> v( 10'000'007, 0.5 );
  map<string, double>  means;

  cout << fixed << setprecision( 1 );
  const int N = 5;
  for( auto i = 0; i < N; ++i ) {
    {
      const auto                            t1     = chrono::high_resolution_clock::now();
      const double                          result = accumulate( v.cbegin(), v.cend(), 0.0 );
      const auto                            t2     = chrono::high_resolution_clock::now();
      const chrono::duration<double, milli> ms     = t2 - t1;
      cout << "std::accumulate result " << result << " took " << ms.count() << " ms\n";
      means["accumulate"] += ms.count() / N;
    }

    {
      const auto                            t1     = chrono::high_resolution_clock::now();
      const double                          result = reduce( execution::seq, v.cbegin(), v.cend() );
      const auto                            t2     = chrono::high_resolution_clock::now();
      const chrono::duration<double, milli> ms     = t2 - t1;
      cout << "std::reduce seq result " << result << " took " << ms.count() << " ms\n";
      means["reduce seq"] += ms.count() / N;
    }

    {
      const auto                            t1     = chrono::high_resolution_clock::now();
      const double                          result = reduce( execution::par, v.cbegin(), v.cend() );
      const auto                            t2     = chrono::high_resolution_clock::now();
      const chrono::duration<double, milli> ms     = t2 - t1;
      cout << "std::reduce par result " << result << " took " << ms.count() << " ms\n";
      means["reduce par"] += ms.count() / N;
    }
  }

  cout << "Average time to operate over 10M items:\n";
  for( const auto &[key, value] : means ) {
    cout << key << ": " << value << "ms\n";
  }

Possible output

Sum: 15 15 15 15
std::accumulate result 5000003.5 took 152.1 ms
std::reduce seq result 5000003.5 took 239.2 ms
std::reduce par result 5000003.5 took 48.2 ms
std::accumulate result 5000003.5 took 169.3 ms
std::reduce seq result 5000003.5 took 216.5 ms
std::reduce par result 5000003.5 took 45.4 ms
std::accumulate result 5000003.5 took 162.4 ms
std::reduce seq result 5000003.5 took 222.9 ms
std::reduce par result 5000003.5 took 39.3 ms
std::accumulate result 5000003.5 took 201.1 ms
std::reduce seq result 5000003.5 took 247.4 ms
std::reduce par result 5000003.5 took 39.4 ms
std::accumulate result 5000003.5 took 90.0 ms
std::reduce seq result 5000003.5 took 129.2 ms
std::reduce par result 5000003.5 took 26.2 ms
Average time to operate over 10M items:
accumulate: 155.0ms
reduce par: 39.7ms
reduce seq: 211.0ms