Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
b9d87fc
Replace cpu-benchmark with similar stress-ng monte-carlo test
quantumsteve Feb 9, 2026
d3778c2
Add dependency psutil
quantumsteve Feb 10, 2026
827c189
terminate io process
quantumsteve Feb 10, 2026
e41861c
check for division by zero
quantumsteve Feb 10, 2026
901e87b
mute stress-ng
quantumsteve Feb 10, 2026
8c560de
Added a --quiet flag to another stress-ng invocation
henricasanova Feb 11, 2026
ffac645
Fixed the zombie problem
henricasanova Feb 11, 2026
e802679
Since memsize argument document says MB (and not MiB), I changed
henricasanova Feb 12, 2026
45124d9
typo-- !!
henricasanova Feb 12, 2026
c92654e
try to workaround missing cpu_queue
quantumsteve Feb 12, 2026
510ca58
typos
quantumsteve Feb 13, 2026
8bb70bc
Rewrite/Re-engineering of wfbench so that the execution proceeds in
henricasanova Feb 20, 2026
ebe24ab
Made it to that even if wfbench is ^C-ed, it doesn't leave runaway
henricasanova Feb 20, 2026
85079cb
Minor fix
henricasanova Feb 20, 2026
8f46f0e
check container output
quantumsteve Mar 2, 2026
61fe80c
bug-- in bin/wfbench
henricasanova Mar 18, 2026
4dee4fd
bug-- in wfbench
henricasanova Mar 18, 2026
2feb997
Merge branch 'stress-ng_cpu_benchmark' of github.com:wfcommons/WfComm…
henricasanova Mar 18, 2026
50e42f9
Updated the create_benchmark() method to allow specifying the number of
henricasanova Mar 18, 2026
4da3f66
Merge branch 'main' into stress-ng_cpu_benchmark
henricasanova Mar 19, 2026
54c212e
Insane race-condition bug fix if wfbench.py (having to deal with killing
henricasanova Mar 19, 2026
68fb5db
cleanup
quantumsteve Mar 20, 2026
8bd49b5
cleanup
quantumsteve Mar 20, 2026
8639ee7
commented out code
quantumsteve Mar 20, 2026
90a6a49
Updagted wfbench to make it callable as a module
henricasanova Mar 21, 2026
9a77ef8
Made the Swift/T translator create a README file with instructions
henricasanova Mar 21, 2026
f627804
Modified swift-t translator fork-exec wfbench (which is known to be
henricasanova Mar 21, 2026
03b058a
Made Swift/T translator use python_exec()
henricasanova Mar 21, 2026
50bc158
test re-enabling
henricasanova Mar 21, 2026
22efdf1
Merge branch 'stress-ng_cpu_benchmark' into stress-ng_cpu_benchmark-w…
henricasanova Mar 21, 2026
32963ae
test updates
henricasanova Mar 21, 2026
f08f54e
Removed all traces of cpu-benchmark.cpp
henricasanova Mar 22, 2026
3aa7021
added a sleep to let redis server time to start in the swift/t container
henricasanova Mar 23, 2026
0a99665
small test fix/cleanup
henricasanova Mar 23, 2026
225b689
cleanup
quantumsteve Mar 23, 2026
a486c92
Update bin/wfbench
henricasanova Mar 27, 2026
6d58507
set type to integer
quantumsteve Apr 6, 2026
1325d1b
add hipified code
quantumsteve Apr 13, 2026
b633026
check CUDA/HIP return values
quantumsteve Apr 13, 2026
2b47fc3
missed file
quantumsteve Apr 14, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 0 additions & 40 deletions Makefile

This file was deleted.

89 changes: 0 additions & 89 deletions bin/cpu-benchmark.cpp

This file was deleted.

5 changes: 5 additions & 0 deletions bin/cuda/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
cmake_minimum_required(VERSION 3.21) # HIP language support requires 3.21
cmake_policy(VERSION 3.21.3...3.27)
project(MyProj LANGUAGES CUDA)
add_executable(gpu_benchmark gpu_benchmark.cu)

28 changes: 19 additions & 9 deletions bin/gpu_benchmark.cu → bin/cuda/gpu_benchmark.cu
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,16 @@
#include <cstdlib> // For std::atoi
#include "gpu_benchmark.h"

// The macro wraps any CUDA API call
#define CUDA_CHECK(ans) { gpuAssert((ans), __FILE__, __LINE__); }

inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true) {
if (code != cudaSuccess) {
fprintf(stderr, "GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
if (abort) exit(code);
}
}

// Kernel function to perform a simple workload
__global__ void simpleKernel(int* data, int size) {
int idx = blockIdx.x * blockDim.x + threadIdx.x;
Expand All @@ -22,10 +32,10 @@ void runBenchmark(int max_work) {
}

// Allocate GPU memory
cudaMalloc(&d_data, max_work * sizeof(int));
CUDA_CHECK(cudaMalloc(&d_data, max_work * sizeof(int)));

// Copy data to GPU
cudaMemcpy(d_data, h_data, max_work * sizeof(int), cudaMemcpyHostToDevice);
CUDA_CHECK(cudaMemcpy(d_data, h_data, max_work * sizeof(int), cudaMemcpyHostToDevice));

// Kernel configuration
int threadsPerBlock = 256;
Expand All @@ -35,13 +45,13 @@ void runBenchmark(int max_work) {
simpleKernel<<<blocksPerGrid, threadsPerBlock>>>(d_data, max_work);

// Ensure the kernel has finished executing
cudaDeviceSynchronize();
CUDA_CHECK(cudaDeviceSynchronize());

// Copy results back to host (optional, just for validation)
cudaMemcpy(h_data, d_data, max_work * sizeof(int), cudaMemcpyDeviceToHost);
CUDA_CHECK(cudaMemcpy(h_data, d_data, max_work * sizeof(int), cudaMemcpyDeviceToHost));

// Cleanup
cudaFree(d_data);
CUDA_CHECK(cudaFree(d_data));
delete[] h_data;

std::cout << "Benchmark completed!" << std::endl;
Expand All @@ -58,10 +68,10 @@ void runBenchmarkTime(int max_work, int runtime_in_seconds) {
}

// Allocate GPU memory
cudaMalloc(&d_data, max_work * sizeof(int));
CUDA_CHECK(cudaMalloc(&d_data, max_work * sizeof(int)));

// Copy data to GPU
cudaMemcpy(d_data, h_data, max_work * sizeof(int), cudaMemcpyHostToDevice);
CUDA_CHECK(cudaMemcpy(d_data, h_data, max_work * sizeof(int), cudaMemcpyHostToDevice));

// Start the timer
auto start = std::chrono::high_resolution_clock::now();
Expand All @@ -77,10 +87,10 @@ void runBenchmarkTime(int max_work, int runtime_in_seconds) {
}

// Copy results back to host (optional, just for validation)
cudaMemcpy(h_data, d_data, max_work * sizeof(int), cudaMemcpyDeviceToHost);
CUDA_CHECK(cudaMemcpy(h_data, d_data, max_work * sizeof(int), cudaMemcpyDeviceToHost));

// Cleanup
cudaFree(d_data);
CUDA_CHECK(cudaFree(d_data));
delete[] h_data;

std::cout << "Benchmark completed!" << std::endl;
Expand Down
File renamed without changes.
5 changes: 5 additions & 0 deletions bin/hip/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
cmake_minimum_required(VERSION 3.21) # HIP language support requires 3.21
cmake_policy(VERSION 3.21.3...3.27)
project(MyProj LANGUAGES HIP)
add_executable(gpu_benchmark gpu_benchmark.hip)

11 changes: 11 additions & 0 deletions bin/hip/gpu_benchmark.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#ifndef GPU_BENCHMARK_H
#define GPU_BENCHMARK_H

#include <hip/hip_runtime.h>

void runBenchmark(int max_work);
void runBenchmarkTime(int max_work, int runtime_in_seconds);

#endif // GPU_BENCHMARK_H


135 changes: 135 additions & 0 deletions bin/hip/gpu_benchmark.hip
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
#include "hip/hip_runtime.h"
#include <iostream>
#include <chrono>
#include <cstdlib> // For std::atoi
#include "gpu_benchmark.h"

#define HIP_CHECK(expression) \
{ \
const hipError_t status = expression; \
if(status != hipSuccess){ \
std::cerr << "HIP error " \
<< status << ": " \
<< hipGetErrorString(status) \
<< " at " << __FILE__ << ":" \
<< __LINE__ << std::endl; \
} \
}

// Kernel function to perform a simple workload
__global__ void simpleKernel(int* data, int size) {
int idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx < size) {
data[idx] = data[idx] * data[idx]; // Simple workload: squaring each element
}
}

// Function to run the GPU benchmark with no time limit
void runBenchmark(int max_work) {
int* h_data = new int[max_work];
int* d_data;

// Initialize data
for (int i = 0; i < max_work; i++) {
h_data[i] = i;
}

// Allocate GPU memory
HIP_CHECK(hipMalloc(&d_data, max_work * sizeof(int)));

// Copy data to GPU
HIP_CHECK(hipMemcpy(d_data, h_data, max_work * sizeof(int), hipMemcpyHostToDevice));

// Kernel configuration
int threadsPerBlock = 256;
int blocksPerGrid = (max_work + threadsPerBlock - 1) / threadsPerBlock;

// Run the kernel
simpleKernel<<<blocksPerGrid, threadsPerBlock>>>(d_data, max_work);

// Ensure the kernel has finished executing
HIP_CHECK(hipDeviceSynchronize());

// Copy results back to host (optional, just for validation)
HIP_CHECK(hipMemcpy(h_data, d_data, max_work * sizeof(int), hipMemcpyDeviceToHost));

// Cleanup
HIP_CHECK(hipFree(d_data));
delete[] h_data;

std::cout << "Benchmark completed!" << std::endl;
}

// Function to run the GPU benchmark for a specified time
void runBenchmarkTime(int max_work, int runtime_in_seconds) {
int* h_data = new int[max_work];
int* d_data;

// Initialize data
for (int i = 0; i < max_work; i++) {
h_data[i] = i;
}

// Allocate GPU memory
HIP_CHECK(hipMalloc(&d_data, max_work * sizeof(int)));

// Copy data to GPU
HIP_CHECK(hipMemcpy(d_data, h_data, max_work * sizeof(int), hipMemcpyHostToDevice));

// Start the timer
auto start = std::chrono::high_resolution_clock::now();

// Kernel configuration
int threadsPerBlock = 256;
int blocksPerGrid = (max_work + threadsPerBlock - 1) / threadsPerBlock;

// Run the workload loop until the specified runtime is reached
while (std::chrono::duration_cast<std::chrono::seconds>(std::chrono::high_resolution_clock::now() - start).count() < runtime_in_seconds) {
simpleKernel<<<blocksPerGrid, threadsPerBlock>>>(d_data, max_work);
HIP_CHECK(hipDeviceSynchronize()); // Ensure the kernel has finished executing
}

// Copy results back to host (optional, just for validation)
HIP_CHECK(hipMemcpy(h_data, d_data, max_work * sizeof(int), hipMemcpyDeviceToHost));

// Cleanup
HIP_CHECK(hipFree(d_data));
delete[] h_data;

std::cout << "Benchmark completed!" << std::endl;
}

int main(int argc, char* argv[]) {
// Check for the correct number of command line arguments
if (argc == 2) {
// Parse the command line arguments
int max_work = std::atoi(argv[1]);

// Validate the input arguments
if (max_work <= 0) {
std::cerr << "max_work must be a positive integer." << std::endl;
return 1;
}

runBenchmark(max_work);

} else if (argc == 3) {
// Parse the command line arguments
int max_work = std::atoi(argv[1]);
int runtime_in_seconds = std::atoi(argv[2]);

// Validate the input arguments
if (max_work <= 0 || runtime_in_seconds <= 0) {
std::cerr << "Both max_work and runtime_in_seconds must be positive integers." << std::endl;
return 1;
}

runBenchmarkTime(max_work, runtime_in_seconds);

} else {
std::cerr << "Usage: " << argv[0] << " <max_work> [runtime_in_seconds]" << std::endl;
return 1;
}

return 0;
}
Loading
Loading