barrier

1. Synchronize Threads in a Critical Section

// Mutex to protect shared variable
std::mutex mtx;
std::barrier barrier(2);

void thread_function() {
  // Acquire lock to enter critical section
  mtx.lock();

  // Wait for other thread to enter critical section
  barrier.wait();

  // Access shared variable
  //...

  // Release lock to exit critical section
  mtx.unlock();
}

2. Coordinate Parallel Tasks

// Create vector of tasks to be executed in parallel
std::vector<std::function<void()>> tasks;

// Initialize barrier to wait for all tasks to complete
std::barrier barrier(tasks.size());

void execute_tasks() {
  // Execute each task in parallel
  #pragma omp parallel for
  for (int i = 0; i < tasks.size(); i++) {
    tasks[i]();
    barrier.wait();
  }
}

3. Implement Thread Pool

// Class representing a thread pool with a barrier
class ThreadPool {
  std::vector<std::thread> threads;
  std::queue<std::function<void()>> tasks;
  std::barrier barrier;

public:
  ThreadPool(int num_threads) {
    for (int i = 0; i < num_threads; i++) {
      threads.push_back(std::thread(&ThreadPool::worker, this));
    }
  }

  void worker() {
    while (true) {
      std::function<void()> task;
      {
        std::lock_guard<std::mutex> lock(task_mutex);
        if (tasks.empty()) {
          continue;
        }
        task = tasks.front();
        tasks.pop();
      }
      task();
      barrier.wait();
    }
  }

  void add_task(std::function<void()> task) {
    {
      std::lock_guard<std::mutex> lock(task_mutex);
      tasks.push(task);
    }
    barrier.wait();
  }

  void stop() {
    // Signal threads to stop
    {
      std::lock_guard<std::mutex> lock(task_mutex);
      tasks.push([]() {});
    }
    barrier.wait();

    // Join threads
    for (auto& thread : threads) {
      thread.join();
    }
  }
};

4. Synchronize Data Processing Across Threads

// Shared data structure to be processed by multiple threads
std::vector<int> data;

// Initialize barrier to wait for all threads to finish processing
std::barrier barrier(num_threads);

void thread_function() {
  // Process subset of data
  for (int i = thread_id * data.size() / num_threads;
       i < (thread_id + 1) * data.size() / num_threads;
       i++) {
    data[i] *= 2;
  }

  // Wait for all threads to finish
  barrier.wait();
}

5. Implement Producer-Consumer Pattern

// Queue to be used for communication between producer and consumer threads
std::queue<int> queue;

// Initialize barrier to wait for producer to fill queue before consumer starts
std::barrier barrier(2);

void producer_thread() {
  // Fill queue with items
  for (int i = 0; i < num_items; i++) {
    queue.push(i);
  }

  // Signal consumer to start processing
  barrier.wait();
}

void consumer_thread() {
  // Wait for producer to fill queue
  barrier.wait();

  // Process items from queue
  while (!queue.empty()) {
    int item = queue.front();
    queue.pop();
  }
}

6. Implement Reductions in Parallel

// Vector of values to be reduced
std::vector<int> values;

// Initialize barrier to wait for all threads to finish reduction
std::barrier barrier(num_threads);

// Thread function to perform reduction
void thread_function() {
  // Perform local reduction
  int local_sum = 0;
  for (int i = thread_id * values.size() / num_threads;
       i < (thread_id + 1) * values.size() / num_threads;
       i++) {
    local_sum += values[i];
  }

  // Wait for all threads to finish
  barrier.wait();

  // Global reduction
  #pragma omp critical
  global_sum += local_sum;
}

7. Implement Matrix Multiplication in Parallel

// Matrices to be multiplied
std::vector<std::vector<int>> A, B;

// Result matrix
std::vector<std::vector<int>> C;

// Initialize barrier to wait for all threads to finish multiplication
std::barrier barrier(num_threads);

void thread_function() {
  // Perform multiplication in parallel
  for (int i = thread_id * C.size() / num_threads;
       i < (thread_id + 1) * C.size() / num_threads;
       i++) {
    for (int j = 0; j < C[0].size(); j++) {
      C[i][j] = 0;
      for (int k = 0; k < A[0].size(); k++) {
        C[i][j] += A[i][k] * B[k][j];
      }
    }
  }

  // Wait for all threads to finish
  barrier.wait();
}

8. Implement Fast Fourier Transform (FFT) in Parallel

// Vector of complex numbers to be transformed
std::vector<std::complex<double>> X;

// Initialize barrier to wait for all threads to finish FFT
std::barrier barrier(num_threads);

void thread_function() {
  // Perform FFT in parallel
  std::transform(X.begin(), X.end(), X.begin(), fft);

  // Wait for all threads to finish
  barrier.wait();
}

9. Implement Quicksort in Parallel

// Vector of integers to be sorted
std::vector<int> arr;

// Initialize barrier to wait for all threads to finish sorting
std::barrier barrier(num_threads);

void thread_function() {
  // Sort a subset of the array
  std::sort(arr.begin() + thread_id * arr.size() / num_threads,
             arr.begin() + (thread_id + 1) * arr.size() / num_threads);

  // Wait for all threads to finish
  barrier.wait();

  // Merge sorted subsets
  if (thread_id > 0) {
    std::merge(arr.begin() + (thread_id - 1) * arr.size() / num_threads,
               arr.begin() + thread_id * arr.size() / num_threads,
               arr.begin() + thread_id * arr.size() / num_threads,
               arr.begin() + (thread_id + 1) * arr.size() / num_threads);
  }
}

10. Implement Matrix Inversion in Parallel

// Matrix to be inverted
std::vector<std::vector<double>> A;

// Inverse matrix
std::vector<std::vector<double>> A_inv;

// Initialize barrier to wait for all threads to finish inversion
std::barrier barrier(num_threads);

void thread_function() {
  // Invert a subset of the matrix
  for (int i = thread_id * A.size() / num_threads;
       i < (thread_id + 1) * A.size() / num_threads;
       i++) {
    for (int j = 0; j < A[0].size(); j++) {
      A_inv[i][j] = 0;
      for (int k = 0; k < A[0].size(); k++) {
        A_inv[i][j] += A[i][k] * A_inv[k][j];
      }
    }
  }

  // Wait for all threads to finish
  barrier.wait();
}

11. Implement Image Processing in Parallel

// Image represented as a 2D array of pixels
std::vector<std::vector<int>> image;

// Initialize barrier to wait for all threads to finish processing
std::barrier barrier(num_threads);

void thread_function() {
  // Process a subset of the image in parallel
  for (int i = thread_id * image.size() / num_threads;
       i < (thread_id + 1) * image.size() / num_threads;
       i++) {
    for (int j = 0; j < image[0].size(); j++) {
      // Apply some image processing operation to pixel (i, j)
      image[i][j] = process_pixel(image[i][j]);
    }
  }

  // Wait for all threads to finish
  barrier.wait();
}

12. Implement Monte Carlo Simulation in Parallel

// Number of Monte Carlo simulations to run
int num_simulations;

// Initialize barrier to wait for all threads to finish running simulations
std::barrier barrier(num_threads);

void thread_function() {
  // Run a subset of the Monte Carlo simulations
  for (int i = thread_id * num_simulations / num_threads;
       i < (thread_id + 1) * num_simulations / num_threads;
       i++) {
    // Run one Monte Carlo simulation
    double result = run_simulation();
  }

  // Wait for all threads to finish
  barrier.wait();
}

13. Implement K-Means Clustering in Parallel

// Dataset to be clustered
std::vector<std::vector<double>> data;

// Number of clusters
int num_clusters;

// Initialize barrier to wait for all threads to finish clustering
std::barrier barrier(num_threads);

void thread_function() {
  // Cluster a subset of the data in parallel
  for (int i = thread_id * data.size() / num_threads;
       i < (thread_id + 1) * data.size() / num_threads;
       i++) {
    // Assign data point to closest cluster
    int cluster_id = assign_to_cluster(data[i]);
  }

  // Wait for all threads to finish
  barrier.wait();
}

14. Implement Dynamic Programming in Parallel

// Problem to be solved using dynamic programming
std::vector<int> dp;

// Initialize barrier to wait for all threads to finish computing DP solution

Previousatomic Nextbit