/* thread_sdot.c (thread) VC++ : cl.exe /O2 thread_sdot.c gcc : gcc -O2 thread_sdot.c -o thread_sdot [-lpthread] [-lm] Usage : > thread_sdot */ #include #include #include #include #ifdef _WIN32 #include HANDLE *hthread; #else #include #include pthread_t *hthread; #endif typedef struct { int nthread; int tid; int n; const float *a; const float *b; double sum; } thread_arg_t; static void sdot_thread(void *arg) { thread_arg_t *targ = (thread_arg_t *)arg; int nthread = targ->nthread; int tid = targ->tid; int n = targ->n; const float *a = targ->a; const float *b = targ->b; // (1) block int block = (n + nthread - 1) / nthread; int i0 = tid * block; int i1 = (tid + 1) * block; if (i1 > n) i1 = n; double sum = 0; for (int i = i0; i < i1; i++) { sum += a[i] * b[i]; } /* // (2) cyclic double sum = 0; for (int i = tid; i < n; i += nthread) { sum += a[i] * b[i]; } */ targ->sum = sum; } static double sdot(int n, const float a[], const float b[], int nthread) { // thread arguments thread_arg_t *targ = (thread_arg_t *)malloc(nthread * sizeof(thread_arg_t)); for (int tid = 0; tid < nthread; tid++) { targ[tid].nthread = nthread; targ[tid].tid = tid; targ[tid].n = n; targ[tid].a = a; targ[tid].b = b; } // multi thread if (nthread > 1) { #ifdef _WIN32 for (int tid = 0; tid < nthread; tid++) { hthread[tid] = CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE)sdot_thread, (void *)&targ[tid], 0, NULL); } WaitForMultipleObjects(nthread, hthread, TRUE, INFINITE); for (int tid = 0; tid < nthread; tid++) { CloseHandle(hthread[tid]); } #else for (int tid = 0; tid < nthread; tid++) { pthread_create(&hthread[tid], NULL, (void *)sdot_thread, (void *)&targ[tid]); } for (int tid = 0; tid < nthread; tid++) { pthread_join(hthread[tid], NULL); } #endif } // single thread else { sdot_thread((void *)targ); } // sum double sum = 0; for (int tid = 0; tid < nthread; tid++) { sum += targ[tid].sum; } return sum; } int main(int argc, char **argv) { int nthread = 1; int n = 1000; int nloop = 1000; #ifdef _WIN32 clock_t t0, t1; #else struct timeval t0, t1; #endif // arguments if (argc >= 4) { n = atoi(argv[1]); nloop = atoi(argv[2]); nthread = atoi(argv[3]); } // thread #ifdef _WIN32 hthread = (HANDLE *)malloc(nthread * sizeof(HANDLE)); #else hthread = (pthread_t *)malloc(nthread * sizeof(pthread_t)); #endif // alloc size_t size = n * sizeof(float); float *a = (float *)malloc(size); float *b = (float *)malloc(size); // setup problem for (int i = 0; i < n; i++) { a[i] = i + 1.0f; b[i] = i + 1.0f; } // timer #ifdef _WIN32 t0 = clock(); #else gettimeofday(&t0, NULL); #endif // calculation double sum = 0; for (int loop = 0; loop < nloop; loop++) { sum += sdot(n, a, b, nthread); } // timer double cpu = 0; #ifdef _WIN32 t1 = clock(); cpu = (double)(t1 - t0) / CLOCKS_PER_SEC; #else gettimeofday(&t1, NULL); cpu = (t1.tv_sec - t0.tv_sec) + 1e-6 * (t1.tv_usec - t0.tv_usec); #endif // output double exact = (double)nloop * n * (n + 1) * (2 * n + 1) / 6.0; printf("N=%d L=%d %.6e(%.6e) err=%.1e %.3f[sec] %d\n", n, nloop, sum, exact, fabs((sum - exact) / exact), cpu, nthread); // free free(hthread); free(a); free(b); return 0; }