Back to TILs

C++ reduce

Date: 2019-08-15Last modified: 2023-12-22

Table of contents

  vector<int> vec{ 0, 1, 2, 3, 4, 5 };

  auto sum0 = accumulate( begin( vec ), end( vec ), 0.0 );        // sequential
  auto sum1 = reduce( begin( vec ), end( vec ) );                 // sequential
  auto sum2 = reduce( execution::seq, begin( vec ), end( vec ) ); // sequential
  auto sum3 = reduce( execution::par, begin( vec ), end( vec ) ); // parallel
  cout << "Sum: " << sum0 << " " << sum1 << " " << sum2 << " " << sum3 << '\n';

Performance

The average time to operate over 10M items normalized by the accumulate time is:

Though std::reduce seems faster compared to std::accumulate, you should be careful when using it with non-commutative binary operations.

  const vector<double> v( 10'000'007, 0.5 );
  map<string, double>  means;

  cout << fixed << setprecision( 1 );
  const int N = 5;
  for( auto i = 0; i < N; ++i ) {
    {
      const auto                            t1     = chrono::high_resolution_clock::now();
      const double                          result = accumulate( v.cbegin(), v.cend(), 0.0 );
      const auto                            t2     = chrono::high_resolution_clock::now();
      const chrono::duration<double, milli> ms     = t2 - t1;
      cout << "std::accumulate result " << result << " took " << ms.count() << " ms\n";
      means["accumulate"] += ms.count() / N;
    }

    {
      const auto                            t1     = chrono::high_resolution_clock::now();
      const double                          result = reduce( execution::seq, v.cbegin(), v.cend() );
      const auto                            t2     = chrono::high_resolution_clock::now();
      const chrono::duration<double, milli> ms     = t2 - t1;
      cout << "std::reduce seq result " << result << " took " << ms.count() << " ms\n";
      means["reduce seq"] += ms.count() / N;
    }

    {
      const auto                            t1     = chrono::high_resolution_clock::now();
      const double                          result = reduce( execution::par, v.cbegin(), v.cend() );
      const auto                            t2     = chrono::high_resolution_clock::now();
      const chrono::duration<double, milli> ms     = t2 - t1;
      cout << "std::reduce par result " << result << " took " << ms.count() << " ms\n";
      means["reduce par"] += ms.count() / N;
    }
  }

  cout << "Average time to operate over 10M items:\n";
  for( const auto &[key, value] : means ) {
    cout << key << ": " << value << "ms\n";
  }

Possible output

Sum: 15 15 15 15
std::accumulate result 5000003.5 took 87.1 ms
std::reduce seq result 5000003.5 took 81.1 ms
std::reduce par result 5000003.5 took 16.3 ms
std::accumulate result 5000003.5 took 84.2 ms
std::reduce seq result 5000003.5 took 78.3 ms
std::reduce par result 5000003.5 took 16.2 ms
std::accumulate result 5000003.5 took 90.2 ms
std::reduce seq result 5000003.5 took 79.3 ms
std::reduce par result 5000003.5 took 17.1 ms
std::accumulate result 5000003.5 took 81.5 ms
std::reduce seq result 5000003.5 took 77.7 ms
std::reduce par result 5000003.5 took 15.1 ms
std::accumulate result 5000003.5 took 81.7 ms
std::reduce seq result 5000003.5 took 77.4 ms
std::reduce par result 5000003.5 took 14.1 ms
Average time to operate over 10M items:
accumulate: 84.9ms
reduce par: 15.7ms
reduce seq: 78.8ms