/* array2d.c test program of 2D array $ cl.exe array2d.c /O2 /openmp $ array2d.exe [fn N L threads] */ # include #include #include #include // (1) i, j loop, i : OpenMP static void fn1(int n, float *a, float *b, float **c) { int i; #pragma omp parallel for for (i = 0; i < n; i++) { for (int j = 0; j < n; j++) { c[i][j] = a[i] + b[j]; } } } // (2) i, j loop, j : OpenMP static void fn2(int n, float *a, float *b, float **c) { for (int i = 0; i < n; i++) { int j; #pragma omp parallel for for (j = 0; j < n; j++) { c[i][j] = a[i] + b[j]; } } } // (3) j, i loop, j : OpenMP static void fn3(int n, float *a, float *b, float **c) { int j; #pragma omp parallel for for (j = 0; j < n; j++) { for (int i = 0; i < n; i++) { c[i][j] = a[i] + b[j]; } } } // (4) j, i loop, i : OpenMP static void fn4(int n, float *a, float *b, float **c) { for (int j = 0; j < n; j++) { int i; #pragma omp parallel for for (i = 0; i < n; i++) { c[i][j] = a[i] + b[j]; } } } int main(int argc, char **argv) { int fn = 1; int N = 2000; int L = 1; int threads = 1; clock_t t0, t1; // arguments if (argc > 4) { fn = atoi(argv[1]); N = atoi(argv[2]); L = atoi(argv[3]); threads = atoi(argv[4]); } // OpenMP omp_set_num_threads(threads); // alloc float *a = (float *)malloc(N * sizeof(float)); float *b = (float *)malloc(N * sizeof(float)); float **c = (float **)malloc(N * sizeof(float *)); for (int i = 0; i < N; i++) { c[i] = (float *)malloc(N * sizeof(float)); } // setup for (int i = 0; i < N; i++) { a[i] = i; b[i] = i; } t0 = clock(); // calculation for (int l = 0; l < L; l++) { if (fn == 1) { fn1(N, a, b, c); } else if (fn == 2) { fn2(N, a, b, c); } else if (fn == 3) { fn3(N, a, b, c); } else if (fn == 4) { fn4(N, a, b, c); } } t1 = clock(); // check sum double sum = 0; for (int i = 0; i < N; i++) { for (int j = 0; j < N; j++) { sum += c[i][j]; } } // output const double cpu = (double)(t1 - t0) / CLOCKS_PER_SEC / L; const double exact = (double)N * N * (N - 1); printf("(%d) N=%d(%d), %.2f[sec], %e, %e\n", fn, N, L, cpu, sum, exact); return 0; }