/*
ARM Version 1.1.0 (2023/11)
Antenna optimization
Random deformation
OpenMOM Version 4.0.0

> ./arm.exe [nthread nrepeat nloop seed1 seed2]
> mpiexec.exe -n <nprocess> ./arm.exe [nthread nrepeat nloop seed1 seed2]
*/

#include <stdint.h>

#define MAIN
#include "omm.h"
#undef MAIN

#include "omm_prototype.h"

#ifdef _OPENMP
#include <omp.h>
#endif

typedef struct {int xy, i, j, on;} segment_t;

extern void initsegment(int, int, int, int64_t *, double, int, segment_t []);
extern void setgeometry(int, int, double, double, double, double, int, int, int, segment_t []);
extern double getvalue(int);
extern void outputomm(const char [], double);
extern void outputofd(const char [], int, int, double, double, double, int);
extern double urand(int, int64_t *);

// MPI
extern void mpi_initialize(int, char **, int *, int *);
extern void mpi_close(void);
extern double comm_cputime(void);
extern void comm_result(int, int, double, int, segment_t []);

static void setfrequency(double, double, int);
static void setmisc(void);
static void calc(int, int);

int main(int argc, char **argv)
{
	const char program[] = "ARM Version 1.1.0";

	// problem (0/1...9)
	int problem = 0;

	// 0=NOSIMD, 1=SSE, 2=AVX
	int simd = 1;
	char strsimd[BUFSIZ];
	strcpy(strsimd, (simd == 0) ? "(NOSIMD)" : (simd == 1) ? "(SSE)" : "(AVX)");
#if defined(__NEC__) || defined(__FUJITSU) || defined(__CLANG_FUJITSU)
	simd = 0;
	strcpy(strsimd, "");
#endif

	// antenna model
	const double lx = 100e-3;
	const double ly = 100e-3;
	const int nx = 10;
	const int ny = 10;
	const double h = 10e-3;       // height
	const double r = 0.5e-3;      // wire radius
	const int sdiv = 2;           // =1/2/3... : segment division

	// frequency
	double freq0 = 3.0e9;
	double freq1 = 3.0e9;
	int freqdiv = 0;

	// hyper parameter (1)
	const double ratio = 0.5;     // 0-1
	const int randtype = 2;       // =1/2/3/4

	// hyper parameters (2)
	int nthread = 1;
	int nrepeat = 32;
	int nloop = 3000;
	int64_t seed1 = 1000;
	int64_t seed2 = 500;

	// setup MPI
	int comm_size = 1;
	int comm_rank = 0;
	mpi_initialize(argc, argv, &comm_size, &comm_rank);
	//printf("%d %d\n", comm_size, comm_rank); fflush(stdout);

	// arguments
	if (argc == 3) {
		// problem No.1 ... 9
		nthread = atoi(argv[1]);
		problem = atoi(argv[2]);
		assert((problem >= 1) && (problem <= 9));
		freq0 = ((problem == 5) || (problem == 6) || (problem == 7)) ? 2.5e9 : 3.0e9;
		freq1 = 3.0e9;
		freqdiv = ((problem == 5) || (problem == 6)) ? 1 : (problem == 7) ? 5 : 0;
	}
	else if (argc > 5) {
		nthread = atoi(argv[1]);
		nrepeat = atoi(argv[2]);
		nloop = atoi(argv[3]);
		seed1 = atoi(argv[4]);
		seed2 = atoi(argv[5]);
	}
	// nrepeat % comm_size = 0
	nrepeat = comm_size * MAX(nrepeat / comm_size, 1);

	// error check
	assert((nx % 2 == 0) && (ny % 2 == 0));
	assert((randtype >= 1) && (randtype <= 4));
	assert(nthread > 0);
	assert(nrepeat > 0);
	assert(nloop > 0);
	assert(seed1 > 0);
	assert(seed2 > 0);

	// set number of threads
#ifdef _OPENMP
	omp_set_num_threads(nthread);
#else
	nthread = 1;
#endif

	// cpu
	double cpu0 = comm_cputime();

	// set frequency
	setfrequency(freq0, freq1, freqdiv);

	// set misc.
	setmisc();

	// alloc (segment)
	const int nseg = (nx + 0) * (ny + 1) + (nx + 1) * (ny + 0);
	segment_t *seg     = (segment_t *)malloc(nseg * sizeof(segment_t));
	segment_t *segbest = (segment_t *)malloc(nseg * sizeof(segment_t));

	// alloc (geometry)
	const int lgeometry = nseg + 2;  // 2 : feed
	Geometry = (geometry_t *)malloc(lgeometry * sizeof(geometry_t));

	// monitor
	if (!comm_rank) {
		printf("%s\n", program);
		printf("nprocess=%d nthread=%d %s\n", comm_size, nthread, strsimd);
		printf("nrepeat=%d nloop=%d seed(%d)=%zd,%zd segment=%d*%d nfrequency=%d\n", nrepeat, nloop, randtype, seed1, seed2, nseg, sdiv, NFrequency);
		fflush(stdout);
	}

	// repeat
	double fminbest = 1e10;
	//for (int irepeat = 0; irepeat < nrepeat; irepeat++) {
	for (int irepeat = comm_rank; irepeat < nrepeat; irepeat += comm_size) {
		//printf("%d %d %d %d\n", comm_size, comm_rank, nrepeat, irepeat); fflush(stdout);

		// initialize random number
		int64_t seed = seed1 + (irepeat * seed2);
		if (randtype == 1) {
			srand((unsigned int)seed);
			rand();
			rand();
		}

		// set initial geometry
		initsegment(nx, ny, randtype, &seed, ratio, nseg, seg);
		setgeometry(nx, ny, lx, ly, h, r, sdiv, lgeometry, nseg, seg);
		//printf("%d\n", NGeometry);

		// alloc
		LElement = lgeometry * sdiv;
		memalloc(1, nthread, simd);
		memalloc(2, nthread, simd);
		//printf("%d %d %d %d\n", comm_rank, LElement, NFeed, NFrequency); fflush(stdout);

		// initial calculation
		calc(nthread, simd);
		double fmin = getvalue(problem);
		//printf("%d 0 %f %d\n", irepeat, fmin, NGeometry);
		//outputomm("arm0.omm", r);

		// loop
		int smin = nseg;
		int nmin = 0;
		for (int loop = 0; loop < nloop; loop++) {
			// random deformation
			const double rnd = urand(randtype, &seed);
			const int irnd = (int)(rnd * nseg);
			//printf("%d %d %d\n", loop, nseg, irnd);
			assert((irnd >= 0) && (irnd < nseg));
			seg[irnd].on = !seg[irnd].on;
			setgeometry(nx, ny, lx, ly, h, r, sdiv, lgeometry, nseg, seg);

			// get value
			calc(nthread, simd);
			double f = getvalue(problem);
			//printf("%d %d %f\n", irepeat, loop + 1, f);

			// judge
			if (f < fmin) {
				fmin = f;
				nmin = loop;
				smin = NGeometry;
				//printf("%d %d %f %d\n", irepeat, loop + 1, f, NGeometry);
			}
			else {
				seg[irnd].on = !seg[irnd].on;
			}
			//printf("%d %d %f %d\n", irepeat, loop + 1, fmin, NGeometry);
		}

		// judge
		printf("%3d %3d%s %f (%d, %d)\n", comm_rank, irepeat + 1, (fmin < fminbest ? "*" : " "), fmin, nmin + 1, smin); fflush(stdout);
		if (fmin < fminbest) {
			fminbest = fmin;
			memcpy(segbest, seg, nseg * sizeof(segment_t));
		}

		// free
		memfree(2, nthread, simd);
		memfree(1, nthread, simd);
	}

	// communicate result
	if (comm_size > 1) {
		comm_result(comm_size, comm_rank, fminbest, nseg, segbest);
	}
	else {
		printf("fmin = %f\n", fminbest); fflush(stdout);
	}

	// output best omm
	if (!comm_rank) {
		setgeometry(nx, ny, lx, ly, h, r, sdiv, lgeometry, nseg, segbest);
		outputomm("arm.omm", r);
		outputofd("arm.ofd", nx, ny, lx, ly, h, sdiv);
		printf("%s\n", "output : arm.omm, arm.ofd");
	}

	// cpu time
	double cpu1 = comm_cputime();
	if (!comm_rank) {
		printf("cpu time = %.3f [sec]\n", cpu1 - cpu0);
	}

	// close MPI
	mpi_close();

	return 0;
}


// set frequency
static void setfrequency(double freq0, double freq1, int freqdiv)
{
	NFrequency = freqdiv + 1;
	Frequency = (double *)malloc(NFrequency * sizeof(double));

	const double dfreq = (freqdiv > 0) ? (freq1 - freq0) / freqdiv : 0;
	for (int nfreq = 0; nfreq <= freqdiv; nfreq++) {
		Frequency[nfreq] = freq0 + (nfreq * dfreq);
	}
}


// set misc. data
static void setmisc(void)
{
	IGround = 1;

	Z0 = 50;
}


// calculation
static void calc(int nthread, int simd)
{
	// make wire grid model
	wiregrid();

	// solve
	const int check = 0;
	const int zmatrix = 0;
	const int monitor = 0;
	double cpu[3];
	solve(nthread, simd, zmatrix, check, monitor, cpu);

	// input impedance
	zfeed();
}
