26 typedef void (*
cell_iterator)(
double * cell_value,
double * comm_buffer);
40 static int64_t
reduce_int64(int64_t input, MPI_Comm comm);
43 static void verify_density_field(
PetaPM * pm,
double * real,
double * meshbuf,
const size_t meshsize);
52 pfft_complex * rho_k = (pfft_complex * )
mymalloc(
"PMrho_k", pm->
priv->
fftsize *
sizeof(
double));
53 memset(rho_k, 0, pm->
priv->
fftsize *
sizeof(
double));
60 #define POS(i) ((double*) (&((char*)CPS->Parts)[CPS->elsize * (i) + CPS->offset_pos]))
61 #define MASS(i) ((float*) (&((char*)CPS->Parts)[CPS->elsize * (i) + CPS->offset_mass]))
62 #define INACTIVE(i) (CPS->active && !CPS->active(i))
86 pfft_plan_with_nthreads(Nthreads);
104 ptrdiff_t n[3] = {Nmesh, Nmesh, Nmesh};
114 MPI_Comm_size(comm, &
NTask);
119 for(i = sqrt(
NTask) + 1; i >= 0; i --) {
120 if(
NTask % i == 0)
break;
125 message(0,
"Using 2D Task mesh %td x %td \n", np[0], np[1]);
127 endrun(0,
"Error: This test file only works with %td processes.\n", np[0]*np[1]);
130 int periods_unused[2];
133 if(pm->
NTask2d[0] != np[0]) abort();
134 if(pm->
NTask2d[1] != np[1]) abort();
149 #define ROLL(a, N, j) { \
150 typeof(a[0]) tmp[N]; \
152 for(k = 0; k < N; k ++) tmp[k] = a[k]; \
153 for(k = 0; k < N; k ++) a[k] = tmp[(k + j)% N]; \
168 pfft_complex * rho_k = (pfft_complex * )
mymalloc(
"PMrho_k", pm->
priv->
fftsize *
sizeof(
double));
169 pfft_complex * complx = (pfft_complex *)
mymalloc(
"PMcomplex", pm->
priv->
fftsize *
sizeof(
double));
173 PFFT_TRANSPOSED_OUT | PFFT_ESTIMATE | PFFT_TUNE | PFFT_DESTROY_INPUT);
176 PFFT_TRANSPOSED_IN | PFFT_ESTIMATE | PFFT_TUNE | PFFT_DESTROY_INPUT);
194 int * tmp = (
int *)
mymalloc(
"tmp",
sizeof(
int) * Nmesh);
195 for(k = 0; k < 2; k ++) {
196 for(i = 0; i < Nmesh; i ++) {
204 MPI_Allreduce(tmp, pm->
Mesh2Task[k], Nmesh, MPI_INT, MPI_MAX, comm);
260 PetaPMRegion * regions = prepare(pm, pstruct, userdata, Nregions);
284 memset(real, 0,
sizeof(
double) * pm->
priv->
fftsize);
293 pfft_complex * complx = (pfft_complex *)
mymalloc(
"PMcomplex", pm->
priv->
fftsize *
sizeof(
double));
297 pfft_complex * rho_k = (pfft_complex * )
mymalloc2(
"PMrho_k", pm->
priv->
fftsize *
sizeof(
double));
318 pfft_complex * rho_k,
329 pfft_complex * complx = (pfft_complex *)
mymalloc(
"PMcomplex", pm->
priv->
fftsize *
sizeof(
double));
408 for (r = 0; r < Nregions; r ++) {
409 NpAlloc += regions[r].
size[0] * regions[r].
size[1];
442 for(i = 1; i <
NTask; i ++) {
464 if(totNpExport != totNpImport) {
465 endrun(1,
"totNpExport = %ld\n", totNpExport);
467 if(totNcExport != totNcImport) {
468 endrun(1,
"totNcExport = %ld\n", totNcExport);
472 message(0,
"PetaPM: %010ld/%010ld Pencils and %010ld Cells\n", totNpExport, totNpAlloc, totNcExport);
488 for (r = 0; r < Nregions; r++) {
490 #pragma omp parallel for private(ix)
491 for(ix = 0; ix < regions[r].
size[0]; ix++) {
493 for(iy = 0; iy < regions[r].
size[1]; iy++) {
494 int poffset = ix * regions[r].
size[1] + iy;
502 regions[r].strides[0] * ix +
503 regions[r].strides[1] * iy;
517 p0 += regions[r].
size[0] * regions[r].
size[1];
530 for(i = 0; i <
NTask; i ++) {
533 if(L->
NpSend[i] == 0)
continue;
535 for(j = 1; j < L->
NpSend[i]; j++) {
548 for(i = 0; i <
NTask; i ++) {
551 for(j = 0; j < L->
NpRecv[i]; j++) {
559 for(i = 0; i <
NTask; i ++) {
562 for(j = 0; j < L->
NpSend[i]; j++) {
577 static void to_pfft(
double * cell,
double * buf) {
578 #pragma omp atomic update
600 sizeof(
double) * p->
len);
611 double massExport = 0;
616 double massImport = 0;
620 double totmassExport;
621 double totmassImport;
622 MPI_Allreduce(&massExport, &totmassExport, 1, MPI_DOUBLE, MPI_SUM, L->
comm);
623 MPI_Allreduce(&massImport, &totmassImport, 1, MPI_DOUBLE, MPI_SUM, L->
comm);
624 message(0,
"totmassExport = %g totmassImport = %g\n", totmassExport, totmassImport);
670 sizeof(
double) * p->
len);
688 #pragma omp parallel for
692 ptrdiff_t linear0 = 0;
693 for(k = 0; k < 2; k ++) {
695 while(ix < 0) ix += pm->
Nmesh;
705 for(j = 0; j < p->
len; j ++) {
706 int iz = p->
offset[2] + j;
707 while(iz < 0) iz += pm->
Nmesh;
728 for(i = 0 ; i < Nregions; i ++) {
732 if ( size == 0 )
return;
735 memset(pm->
priv->
meshbuf, 0, size *
sizeof(
double));
737 for(i = 0 ; i < Nregions; i ++) {
755 double * Pos =
POS(i);
762 if(RegionInd >= Nregions)
763 endrun(1,
"Particle %d has region %d out of bounds %d\n", i, RegionInd, Nregions);
766 for(k = 0; k < 3; k++) {
768 iCell[k] = floor(tmp);
769 Res[k] = tmp - iCell[k];
770 iCell[k] -= region->
offset[k];
772 if(iCell[k] >= region->
size[k] - 1 || iCell[k] < 0) {
773 endrun(1,
"particle out of cell better stop %d (k=%d) %g %g %g region: %td %td\n", iCell[k],k,
774 Pos[0], Pos[1], Pos[2],
780 for(connection = 0; connection < 8; connection++) {
783 for(k = 0; k < 3; k++) {
784 int offset = (connection >> k) & 1;
785 int tmp = iCell[k] +
offset;
786 linear += tmp * region->
strides[k];
792 endrun(1,
"particle linear index out of cell better stop\n");
794 iterator(pm, i, ®ion->
buffer[linear], weight);
806 #pragma omp parallel for
816 for(k = 2; k >= 0; k --) {
818 rt = region->
size[k] * rt;
828 for(k = 0; k < 2; k ++) {
830 while(ix < 0) ix += pm->
Nmesh;
841 if(p2->
len == 0)
return -1;
842 if(p1->
len == 0)
return 1;
845 return ((t2 < t1) - (t1 < t2)) * 2 +
850 static void verify_density_field(
PetaPM * pm,
double * real,
double * meshbuf,
const size_t meshsize) {
852 double mass_Part = 0;
854 #pragma omp parallel for reduction(+: mass_Part)
856 double Mass = *
MASS(j);
859 double totmass_Part = 0;
860 MPI_Allreduce(&mass_Part, &totmass_Part, 1, MPI_DOUBLE, MPI_SUM, pm->
comm);
862 double mass_Region = 0;
865 #pragma omp parallel for reduction(+: mass_Region)
866 for(i = 0; i < meshsize; i ++) {
867 mass_Region += meshbuf[i];
869 double totmass_Region = 0;
870 MPI_Allreduce(&mass_Region, &totmass_Region, 1, MPI_DOUBLE, MPI_SUM, pm->
comm);
872 #pragma omp parallel for reduction(+: mass_CIC)
876 double totmass_CIC = 0;
877 MPI_Allreduce(&mass_CIC, &totmass_CIC, 1, MPI_DOUBLE, MPI_SUM, pm->
comm);
879 message(0,
"total Region mass err = %g CIC mass err = %g Particle mass = %g\n", totmass_Region / totmass_Part - 1, totmass_CIC / totmass_Part - 1, totmass_Part);
891 #pragma omp parallel for
892 for(ip = 0; ip < region->
totalsize; ip ++) {
898 for(k = 0; k < 3; k ++) {
899 pos[k] = tmp / region->
strides[k];
900 tmp -= pos[k] * region->
strides[k];
902 pos[k] += region->
offset[k];
904 if(pos[k] >= pm->
Nmesh) {
905 endrun(1,
"position didn't make sense\n");
909 k2 += ((int64_t)kpos[k]) * kpos[k];
916 dst[ip][0] = src[ip][0];
917 dst[ip][1] = src[ip][1];
919 H(pm, k2, pos, &dst[ip]);
930 double Mass = *
MASS(i);
933 #pragma omp atomic update
934 mesh[0] += weight * Mass;
938 MPI_Allreduce(&input, &result, 1,
MPI_INT64, MPI_SUM, comm);
void message(int where, const char *fmt,...)
void endrun(int where, const char *fmt,...)
static PetaPMGlobalFunctions global_functions
static PetaPMFunctions functions[]
#define mymalloc(name, size)
#define mymalloc2(name, size)
#define report_memory_usage(x)
void petapm_module_init(int Nthreads)
PetaPMRegion * petapm_get_real_region(PetaPM *pm)
void petapm_init(PetaPM *pm, double BoxSize, double Asmth, int Nmesh, double G, MPI_Comm comm)
static void pm_init_regions(PetaPM *pm, PetaPMRegion *regions, const int Nregions)
static void layout_iterate_cells(PetaPM *pm, struct Layout *L, cell_iterator iter, double *real)
void petapm_destroy(PetaPM *pm)
static void layout_exchange_pencils(struct Layout *L)
int petapm_mesh_to_k(PetaPM *pm, int i)
void petapm_force_finish(PetaPM *pm)
static void pm_apply_transfer_function(PetaPM *pm, pfft_complex *src, pfft_complex *dst, petapm_transfer_func H)
static void pm_iterate(PetaPM *pm, pm_iterator iterator, PetaPMRegion *regions, const int Nregions)
static void layout_build_and_exchange_cells_to_pfft(PetaPM *pm, struct Layout *L, double *meshbuf, double *real)
static void pm_iterate_one(PetaPM *pm, int i, pm_iterator iterator, PetaPMRegion *regions, const int Nregions)
pfft_complex * petapm_force_r2c(PetaPM *pm, PetaPMGlobalFunctions *global_functions)
static int pencil_cmp_target(const void *v1, const void *v2)
static void to_region(double *cell, double *region)
static void to_pfft(double *cell, double *buf)
static void layout_prepare(PetaPM *pm, struct Layout *L, double *meshbuf, PetaPMRegion *regions, const int Nregions, MPI_Comm comm)
static int64_t reduce_int64(int64_t input, MPI_Comm comm)
static MPI_Datatype MPI_PENCIL
int * petapm_get_ntask2d(PetaPM *pm)
static void layout_build_and_exchange_cells_to_local(PetaPM *pm, struct Layout *L, double *meshbuf, double *real)
void petapm_region_init_strides(PetaPMRegion *region)
void(* pm_iterator)(PetaPM *pm, int i, double *mesh, double weight)
void petapm_force_c2r(PetaPM *pm, pfft_complex *rho_k, PetaPMRegion *regions, const int Nregions, PetaPMFunctions *functions)
pfft_complex * petapm_alloc_rhok(PetaPM *pm)
static void layout_finish(struct Layout *L)
PetaPMRegion * petapm_get_fourier_region(PetaPM *pm)
static void layout_build_pencils(PetaPM *pm, struct Layout *L, double *meshbuf, PetaPMRegion *regions, const int Nregions)
static PetaPMParticleStruct * CPS
void petapm_force(PetaPM *pm, petapm_prepare_func prepare, PetaPMGlobalFunctions *global_functions, PetaPMFunctions *functions, PetaPMParticleStruct *pstruct, void *userdata)
PetaPMRegion * petapm_force_init(PetaPM *pm, petapm_prepare_func prepare, PetaPMParticleStruct *pstruct, int *Nregions, void *userdata)
void(* cell_iterator)(double *cell_value, double *comm_buffer)
static void put_particle_to_mesh(PetaPM *pm, int i, double *mesh, double weight)
static int pos_get_target(PetaPM *pm, const int pos[2])
int * petapm_get_thistask2d(PetaPM *pm)
PetaPMRegion *(* petapm_prepare_func)(PetaPM *pm, PetaPMParticleStruct *pstruct, void *data, int *Nregions)
void(* petapm_transfer_func)(PetaPM *pm, int64_t k2, int kpos[3], pfft_complex *value)
void(* petapm_readout_func)(PetaPM *pm, int i, double *mesh, double weight)
struct Pencil * PencilSend
struct Pencil * PencilRecv
petapm_readout_func readout
petapm_transfer_func transfer
petapm_transfer_func global_transfer
void(* global_analysis)(PetaPM *pm)
petapm_transfer_func global_readout
PetaPMRegion real_space_region
PetaPMRegion fourier_space_region
#define MPIU_Barrier(comm)
#define walltime_measure(name)