51 force_tree_build(
int npart,
int mask,
DomainDecomp * ddecomp,
const int HybridNuGrav,
const int DoMoments,
const char * EmergencyOutputDir);
70 static void force_validate_nextlist(
const ForceTree * tree)
77 endrun(5,
"Node %d (type %d) has sibling %d next %d father %d first %d final %d last %d ntop %d\n", no, current->
f.
ChildType, current->
sibling, current->
s.
suns[0], current->
father, tree->
firstnode, tree->
firstnode + tree->
numnodes, tree->
lastnode, tree->
NTopLeaves);
88 no = current->
s.
suns[0];
98 message(1,
"Danger! no %d has father %d, next %d sib %d, (ptype = %d) len %g center (%g %g %g) mass %g cofm %g %g %g TL %d DLM %d ITL %d nocc %d suns %d %d %d %d\n", no, current->
father, current->
s.
suns[0], current->
sibling, current->
f.
ChildType,
115 int child = ev->
child;
128 if(child < tree->nfather && child >= 0)
155 message(0,
"Tree constructed (moments: %d). First node %d, number of nodes %d, first pseudo %d. NTopLeaves %d\n",
164 message(0,
"Tree construction for types: %d.\n", mask);
175 message(0,
"Tree constructed (type mask: %d). First node %d, number of nodes %d, first pseudo %d. NTopLeaves %d\n",
195 int TooManyNodes = 0;
198 MPI_Reduce(&maxnodes, &maxmaxnodes, 1,
MPI_INT64, MPI_MAX,0, MPI_COMM_WORLD);
199 message(0,
"Treebuild: Largest is %g MByte for %ld tree nodes. firstnode %ld. (presently allocated %g MB)\n",
212 message(1,
"Not enough tree nodes (%ld) for %d particles. Created %d\n", maxnodes, npart, tree.
numnodes);
227 if(
MPIU_Any(TooManyNodes, MPI_COMM_WORLD)) {
229 if(EmergencyOutputDir) {
236 endrun(2,
"Required too many nodes, snapshot dumped\n");
241 force_validate_nextlist(&tree);
246 force_validate_nextlist(&tree);
255 force_validate_nextlist(&tree);
270 force_validate_nextlist(&tree);
284 return (
P[p_i].Pos[0] > node->
center[0]) +
285 ((
P[p_i].Pos[1] > node->
center[1]) << 1) +
286 ((
P[p_i].Pos[2] > node->
center[2]) << 2);
296 (fabs(2*(
P[p_i].Pos[0] - node->
center[0])) <= node->
len) *
297 (fabs(2*(
P[p_i].Pos[1] - node->
center[1])) <= node->
len) *
298 (fabs(2*(
P[p_i].Pos[2] - node->
center[2])) <= node->
len);
308 nfreep->
len = 0.5 * parent->
len;
317 for(j = 0; j < 3; j++) {
320 const int sign = (subnode & (1 << j)) ? 1 : -1;
324 nfreep->
s.
suns[j] = -1;
336 #define NODECACHE_SIZE (8*12)
363 tb.
Father[p_toplace] = parent;
366 tb.
Nodes[parent].
s.
Types +=
P[p_toplace].Type << (3*subnode);
381 int parent = firstparent;
391 int * oldsuns = nprnt->
s.
suns;
402 message(1,
"Failed placing %d at %g %g %g, type %d, ID %ld. Others were %d (%g %g %g, t %d ID %ld) and %d (%g %g %g, t %d ID %ld). next %d last %d\n",
403 p_toplace,
P[p_toplace].Pos[0],
P[p_toplace].Pos[1],
P[p_toplace].Pos[2],
P[p_toplace].Type,
P[p_toplace].ID,
404 oldsuns[0],
P[oldsuns[0]].Pos[0],
P[oldsuns[0]].Pos[1],
P[oldsuns[0]].Pos[2],
P[oldsuns[0]].Type,
P[oldsuns[0]].ID,
405 oldsuns[1],
P[oldsuns[1]].Pos[0],
P[oldsuns[1]].Pos[1],
P[oldsuns[1]].Pos[2],
P[oldsuns[1]].Type,
P[oldsuns[1]].ID
411 if(firstparent != parent)
421 newsuns[i] = newsuns[0] + i;
422 struct NODE *nfreep = &tb.
Nodes[newsuns[i]];
437 int child = newsuns[subnode];
448 int child = nprnt->
s.
suns[i];
455 memset(&nprnt->
mom, 0,
sizeof(nprnt->
mom));
459 int child = nprnt->
s.
suns[subnode];
507 endrun(1,
"Corruption in tree build: N[%d].[%d] = %d > lastnode (%d)\n",cur, subnode, child, tb.
lastnode);
520 else if(nocc < 1<<16) {
524 endrun(2,
"Tried to convert already converted node %d with nocc = %d\n", cur, nocc);
535 int this_left = left;
536 int this_right = right;
540 while(this_left != left_end && this_right != right_end)
543 endrun(10,
"Encountered invalid node: %d %d < first %d\n", this_left, this_right, tb.
firstnode);
544 struct NODE * nleft = &tb.
Nodes[this_left];
545 struct NODE * nright = &tb.
Nodes[this_right];
550 if((nleft->
f.
TopLevel && this_left != left) || (nright->
f.
TopLevel && this_right != right))
551 endrun(6,
"Encountered another topnode: left %d == right %d! type %d\n", this_left, this_right, nleft->
f.
ChildType);
552 if(this_left == this_right)
553 endrun(6,
"Odd: left %d == right %d! type %d\n", this_left, this_right, nleft->
f.
ChildType);
556 if(fabs(nleft->
len / nright->
len-1) > 1e-6)
557 endrun(6,
"Merge unsynced trees: %d %d len %g %g\n", this_left, this_right, nleft->
len, nright->
len);
562 endrun(7,
"Walking to nodes (%d %d) from (%d %d) fathers (%d %d)\n",
564 this_left = nleft->
s.
suns[0];
565 this_right = nright->
s.
suns[0];
573 endrun(8,
"Bad child %d of %d\n", i, nright->
s.
suns[i], this_right);
583 if(this_right > right) {
588 for(i = 0; i < 8; i++)
590 if(fat->
s.
suns[i] == this_right) {
615 endrun(8,
"Bad child %d of %d\n", i, nleft->
s.
suns[i], this_left);
620 memmove(&nleft->
s, &nright->
s,
sizeof(nleft->
s));
623 memset(&nleft->
mom, 0,
sizeof(nleft->
mom));
626 for(i = 0; i < 8; i++) {
627 int child = nleft->
s.
suns[i];
671 for(i = 0; i < 3; i++)
674 nfreep->
s.
suns[i] = -1;
696 MPI_Comm_rank(MPI_COMM_WORLD, &
ThisTask);
699 const int nthr = omp_get_max_threads();
700 int * topnodes =
ta_malloc(
"topnodes",
int, (EndLeaf - StartLeaf) * nthr);
702 for(j = 0; j < EndLeaf - StartLeaf; j++)
705 for(t = 1; t < nthr; t++) {
706 for(j = 0; j < EndLeaf - StartLeaf; j++) {
708 topnodes[j + t * (EndLeaf - StartLeaf)] = nnext;
709 memmove(&tb.
Nodes[nnext], &tb.
Nodes[topnodes[j]],
sizeof(
struct NODE));
715 const int first_free = nnext;
723 int tid = omp_get_thread_num();
724 const int *
const local_topnodes = topnodes + tid * (EndLeaf - StartLeaf);
737 int this_acc = local_topnodes[0];
751 #pragma omp for schedule(static, chnksz)
752 for(i = 0; i < npart; i++)
759 if(!((1<<
P[i].Type) & mask)) {
763 if(
P[i].IsGarbage || (
P[i].Swallowed &&
P[i].Type==5))
767 endrun(12,
"Zero mass particle %d type %d id %ld pos %g %g %g\n", i,
P[i].Type,
P[i].ID,
P[i].Pos[0],
P[i].Pos[1],
P[i].Pos[2]);
778 cur = local_topnodes[topleaf - StartLeaf];
791 #pragma omp for schedule(static, 1)
792 for(i = 0; i < EndLeaf - StartLeaf; i++) {
795 const int target = topnodes[i];
798 for(t = 1; t < nthr; t++) {
799 const int righttop = topnodes[i + t * (EndLeaf - StartLeaf)];
826 for(i = 0; i < 2; i++)
827 for(j = 0; j < 2; j++)
828 for(k = 0; k < 2; k++)
832 int count = i + 2 * j + 4 * k;
835 Nodes[no].
s.
suns[count] = *nextfree;
844 Nodes[*nextfree].
father = no;
855 if(*nextfree >= lastnode)
856 endrun(11,
"Not enough force nodes to topnode grid: need %d\n",lastnode);
860 int chld = Nodes[no].
s.
suns[j];
864 for(i = 0; i < 2; i++)
865 for(j = 0; j < 2; j++)
866 for(k = 0; k < 2; k++)
869 int count = i + 2 * j + 4 * k;
871 bits + 1, 2 * x + i, 2 * y + j, 2 * z + k, nextfree, lastnode);
886 const int firstpseudo = tree->
lastnode;
888 MPI_Comm_rank(MPI_COMM_WORLD, &
ThisTask);
895 endrun(5,
"In node %d, overwriting %d child particles (i = %d etc) with pseudo particle %d\n",
899 tree->
Nodes[index].
s.
suns[0] = firstpseudo + i;
919 pnode->
mom.
cofm[k] += (
P[i].Mass *
P[i].Pos[k]);
926 for(j = 0; j < 3; j++) {
939 for(jj = j + 1; jj < 8; jj++) {
954 endrun(3,
"force_update_particle_node called on node %d of wrong type!\n", no);
961 for(j = 0; j < 3; j++)
965 for(j = 0; j < 3; j++)
997 for(j=0; j < 8; j++, jj++) {
1002 while(jj < 8 && !tree->Nodes[suns[jj]].f.TopLevel &&
1016 for(j = 0; j < 8; j++)
1030 if(childcnt > 1 && level < 512) {
1031 #pragma omp task default(none) shared(level, childcnt, tree) firstprivate(nextsib, p)
1040 #pragma omp taskwait
1043 for(j = 0; j < 8; j++)
1045 const int p = suns[j];
1084 MPI_Comm_rank(MPI_COMM_WORLD, &
ThisTask);
1086 #pragma omp parallel
1087 #pragma omp single nowait
1094 #pragma omp task default(none) shared(tree) firstprivate(no)
1100 endrun(5,
"Error, found pseudo node %d but domain entry %d says on task %d\n", no, i,
ThisTask);
1112 int i, no, ta, recvTask;
1113 int *recvcounts, *recvoffset;
1114 struct topleaf_momentsdata
1122 MPI_Comm_size(MPI_COMM_WORLD, &
NTask);
1123 MPI_Comm_rank(MPI_COMM_WORLD, &
ThisTask);
1125 TopLeafMoments = (
struct topleaf_momentsdata *)
mymalloc(
"TopLeafMoments", ddecomp->
NTopLeaves *
sizeof(TopLeafMoments[0]));
1126 memset(&TopLeafMoments[0], 0,
sizeof(TopLeafMoments[0]) * ddecomp->
NTopLeaves);
1131 endrun(131231231,
"TopLeaf %d Task table is corrupted: task is %d\n", i, ddecomp->
TopLeaves[i].
Task);
1158 recvcounts = (
int *)
mymalloc(
"recvcounts",
sizeof(
int) *
NTask);
1159 recvoffset = (
int *)
mymalloc(
"recvoffset",
sizeof(
int) *
NTask);
1161 for(recvTask = 0; recvTask <
NTask; recvTask++)
1163 recvoffset[recvTask] = ddecomp->
Tasks[recvTask].
StartLeaf *
sizeof(TopLeafMoments[0]);
1167 MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL,
1168 &TopLeafMoments[0], recvcounts, recvoffset,
1169 MPI_BYTE, MPI_COMM_WORLD);
1175 for(ta = 0; ta <
NTask; ta++) {
1178 for(i = ddecomp->
Tasks[ta].
StartLeaf; i < ddecomp->Tasks[ta].EndLeaf; i ++) {
1214 for(j = 0; j < 8; j++)
1217 if(p < tree->firstnode || p >= tree->
lastnode)
1218 endrun(6767,
"Updating pseudos: %d -> %d which is not an internal node between %d and %d.",no, p, tree->
firstnode, tree->
lastnode);
1222 endrun(6767,
"Tried to update toplevel node %d with parent %d != expected %d\n", p, tree->
Nodes[p].
father, no);
1275 int *recvcounts, *recvoffset;
1284 MPI_Comm_size(MPI_COMM_WORLD, &
NTask);
1285 MPI_Comm_rank(MPI_COMM_WORLD, &
ThisTask);
1287 #pragma omp parallel for
1288 for(i = 0; i < size; i++)
1290 const int p_i = activeset ? activeset[i] : i;
1292 if(
P[p_i].Type != 0 ||
P[p_i].IsGarbage)
1295 int no = tree->
Father[p_i];
1301 MyFloat readhmax, newhmax = 0;
1303 for(j = 0; j < 3; j++) {
1313 #pragma omp atomic read
1316 if(newhmax <= readhmax) {
1321 }
while(!__atomic_compare_exchange(&(tree->
Nodes[no].
mom.
hmax), &readhmax, &newhmax, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED));
1329 double * TopLeafhmax = (
double *)
mymalloc(
"TopLeafMoments", ddecomp->
NTopLeaves *
sizeof(
double));
1330 memset(&TopLeafhmax[0], 0,
sizeof(
double) * ddecomp->
NTopLeaves);
1338 recvcounts = (
int *)
mymalloc(
"recvcounts",
sizeof(
int) *
NTask);
1339 recvoffset = (
int *)
mymalloc(
"recvoffset",
sizeof(
int) *
NTask);
1341 for(recvTask = 0; recvTask <
NTask; recvTask++)
1347 MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL,
1348 &TopLeafhmax[0], recvcounts, recvoffset,
1349 MPI_DOUBLE, MPI_COMM_WORLD);
1354 for(ta = 0; ta <
NTask; ta++) {
1357 for(i = ddecomp->
Tasks[ta].
StartLeaf; i < ddecomp->Tasks[ta].EndLeaf; i ++) {
1388 memset(tb.
Father, -1, maxpart *
sizeof(
int));
1396 if(maxpart + maxnodes >= 1L<<30)
1397 endrun(5,
"Size of tree overflowed for maxpart = %ld, maxnodes = %ld!\n", maxpart, maxnodes);
void dump_snapshot(const char *dump, const double Time, const Cosmology *CP, const char *OutputDir)
static int domain_get_topleaf(const peano_t key, const DomainDecomp *ddecomp)
void message(int where, const char *fmt,...)
void endrun(int where, const char *fmt,...)
int event_listen(EventSpec *eh, eventfunc func, void *userdata)
int event_unlisten(EventSpec *eh, eventfunc func, void *userdata)
static void init_internal_node(struct NODE *nfreep, struct NODE *parent, int subnode)
static ForceTree force_tree_build(int npart, int mask, DomainDecomp *ddecomp, const int HybridNuGrav, const int DoMoments, const char *EmergencyOutputDir)
static void force_treeupdate_pseudos(int no, const ForceTree *tree)
int merge_partial_force_trees(int left, int right, struct NodeCache *nc, int *nnext, const struct ForceTree tb, int HybridNuGrav)
static void force_create_node_for_topnode(int no, int topnode, struct NODE *Nodes, const DomainDecomp *ddecomp, int bits, int x, int y, int z, int *nextfree, const int lastnode)
void force_tree_rebuild(ForceTree *tree, DomainDecomp *ddecomp, const int HybridNuGrav, const int DoMoments, const char *EmergencyOutputDir)
int force_tree_create_nodes(const ForceTree tb, const int npart, int mask, DomainDecomp *ddecomp, const int HybridNuGrav)
static void add_particle_moment_to_node(struct NODE *pnode, int i)
int force_tree_allocated(const ForceTree *tree)
int get_freenode(int *nnext, struct NodeCache *nc)
ForceTree force_treeallocate(int64_t maxnodes, int64_t maxpart, DomainDecomp *ddecomp)
static int force_tree_eh_slots_fork(EIBase *event, void *userdata)
void force_update_node_parallel(const ForceTree *tree, const DomainDecomp *ddecomp)
int get_subnode(const struct NODE *node, const int p_i)
static int force_get_sibling(const int sib, const int j, const int *suns)
void force_update_hmax(int *activeset, int size, ForceTree *tree, DomainDecomp *ddecomp)
static void force_exchange_pseudodata(ForceTree *tree, const DomainDecomp *ddecomp)
void force_tree_rebuild_mask(ForceTree *tree, DomainDecomp *ddecomp, int mask, const int HybridNuGrav, const char *EmergencyOutputDir)
static int create_new_node_layer(int firstparent, int p_toplace, const int HybridNuGrav, const ForceTree tb, int *nnext, struct NodeCache *nc)
void init_forcetree_params(const int FastParticleType)
int add_particle_to_tree(int i, int cur_start, const ForceTree tb, const int HybridNuGrav, struct NodeCache *nc, int *nnext)
void force_tree_free(ForceTree *tree)
static void force_insert_pseudo_particles(const ForceTree *tree, const DomainDecomp *ddecomp)
static int inside_node(const struct NODE *node, const int p_i)
int force_get_father(int no, const ForceTree *tree)
static void force_update_particle_node(int no, const ForceTree *tree)
static int force_update_node_recursive(int no, int sib, int level, const ForceTree *tree)
static int modify_internal_node(int parent, int subnode, int p_toplace, const ForceTree tb, const int HybridNuGrav)
static struct forcetree_params ForceTreeParams
static int node_is_node(int no, const ForceTree *tree)
#define PARTICLE_NODE_TYPE
static int node_is_pseudo_particle(int no, const ForceTree *tree)
#define mymalloc(name, size)
#define ta_malloc(name, type, nele)
#define myrealloc(ptr, size)
#define mymalloc_usedbytes()
struct part_manager_type PartManager[1]
peano_t peano_hilbert_key(int x, int y, int z, int bits)
struct slots_manager_type SlotsManager[1]
struct topnode_data * TopNodes
struct topleaf_data * TopLeaves
int moments_computed_flag
struct topleaf_data * TopLeaves
unsigned int DependsOnLocalMass
unsigned int InternalTopLevel
int MPIU_Any(int condition, MPI_Comm comm)
static int atomic_fetch_and_add(int *ptr, int value)
#define MPIU_Barrier(comm)
#define walltime_measure(name)