MP-Gadget  5.0.1.dev1-76bc7d4726-dirty
Classes | Macros | Functions | Variables
forcetree.c File Reference

gravitational tree More...

#include <mpi.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <time.h>
#include <omp.h>
#include "slotsmanager.h"
#include "partmanager.h"
#include "domain.h"
#include "forcetree.h"
#include "checkpoint.h"
#include "walltime.h"
#include "utils/endrun.h"
Include dependency graph for forcetree.c:

Go to the source code of this file.

Classes

struct  forcetree_params
 
struct  NodeCache
 

Macros

#define NODECACHE_SIZE   (8*12)
 

Functions

void init_forcetree_params (const int FastParticleType)
 
static ForceTree force_tree_build (int npart, int mask, DomainDecomp *ddecomp, const int HybridNuGrav, const int DoMoments, const char *EmergencyOutputDir)
 
static void force_treeupdate_pseudos (int no, const ForceTree *tree)
 
static void force_create_node_for_topnode (int no, int topnode, struct NODE *Nodes, const DomainDecomp *ddecomp, int bits, int x, int y, int z, int *nextfree, const int lastnode)
 
static void force_exchange_pseudodata (ForceTree *tree, const DomainDecomp *ddecomp)
 
static void force_insert_pseudo_particles (const ForceTree *tree, const DomainDecomp *ddecomp)
 
static void add_particle_moment_to_node (struct NODE *pnode, int i)
 
static int force_tree_eh_slots_fork (EIBase *event, void *userdata)
 
int force_tree_allocated (const ForceTree *tree)
 
void force_tree_rebuild (ForceTree *tree, DomainDecomp *ddecomp, const int HybridNuGrav, const int DoMoments, const char *EmergencyOutputDir)
 
void force_tree_rebuild_mask (ForceTree *tree, DomainDecomp *ddecomp, int mask, const int HybridNuGrav, const char *EmergencyOutputDir)
 
int get_subnode (const struct NODE *node, const int p_i)
 
static int inside_node (const struct NODE *node, const int p_i)
 
static void init_internal_node (struct NODE *nfreep, struct NODE *parent, int subnode)
 
int get_freenode (int *nnext, struct NodeCache *nc)
 
static int modify_internal_node (int parent, int subnode, int p_toplace, const ForceTree tb, const int HybridNuGrav)
 
static int create_new_node_layer (int firstparent, int p_toplace, const int HybridNuGrav, const ForceTree tb, int *nnext, struct NodeCache *nc)
 
int add_particle_to_tree (int i, int cur_start, const ForceTree tb, const int HybridNuGrav, struct NodeCache *nc, int *nnext)
 
int merge_partial_force_trees (int left, int right, struct NodeCache *nc, int *nnext, const struct ForceTree tb, int HybridNuGrav)
 
int force_tree_create_nodes (const ForceTree tb, const int npart, int mask, DomainDecomp *ddecomp, const int HybridNuGrav)
 
int force_get_father (int no, const ForceTree *tree)
 
static int force_get_sibling (const int sib, const int j, const int *suns)
 
static void force_update_particle_node (int no, const ForceTree *tree)
 
static int force_update_node_recursive (int no, int sib, int level, const ForceTree *tree)
 
void force_update_node_parallel (const ForceTree *tree, const DomainDecomp *ddecomp)
 
void force_update_hmax (int *activeset, int size, ForceTree *tree, DomainDecomp *ddecomp)
 
ForceTree force_treeallocate (int64_t maxnodes, int64_t maxpart, DomainDecomp *ddecomp)
 
void force_tree_free (ForceTree *tree)
 

Variables

static struct forcetree_params ForceTreeParams
 

Detailed Description

gravitational tree

This file contains the computation of the gravitational force by means of a tree. The type of tree implemented is a geometrical oct-tree, starting from a cube encompassing all particles. This cube is automatically found in the ddecomp decomposition, which also splits up the global "top-level" tree along node boundaries, moving the particles of different parts of the tree to separate processors.

Local naming convention: once built it is ForceTree * tree, passed by reference. While the nodes are still being added it is ForceTree tb, passed by value.

Definition in file forcetree.c.

Macro Definition Documentation

◆ NODECACHE_SIZE

#define NODECACHE_SIZE   (8*12)

Definition at line 336 of file forcetree.c.

Function Documentation

◆ add_particle_moment_to_node()

static void add_particle_moment_to_node ( struct NODE pnode,
int  i 
)
static

Definition at line 914 of file forcetree.c.

915 {
916  int k;
917  pnode->mom.mass += (P[i].Mass);
918  for(k=0; k<3; k++)
919  pnode->mom.cofm[k] += (P[i].Mass * P[i].Pos[k]);
920 
921  if(P[i].Type == 0)
922  {
923  int j;
924  /* Maximal distance any of the member particles peek out from the side of the node.
925  * May be at most hmax, as |Pos - Center| < len.*/
926  for(j = 0; j < 3; j++) {
927  pnode->mom.hmax = DMAX(pnode->mom.hmax, fabs(P[i].Pos[j] - pnode->center[j]) + P[i].Hsml - pnode->len);
928  }
929  }
930 }
#define P
Definition: partmanager.h:88
struct NODE::@9 mom
MyFloat center[3]
Definition: forcetree.h:43
MyFloat mass
Definition: forcetree.h:56
MyFloat hmax
Definition: forcetree.h:57
MyFloat cofm[3]
Definition: forcetree.h:55
MyFloat len
Definition: forcetree.h:42
#define DMAX(x, y)
Definition: test_interp.c:11

References NODE::center, NODE::cofm, DMAX, NODE::hmax, NODE::len, NODE::mass, NODE::mom, and P.

Referenced by modify_internal_node().

Here is the caller graph for this function:

◆ add_particle_to_tree()

int add_particle_to_tree ( int  i,
int  cur_start,
const ForceTree  tb,
const int  HybridNuGrav,
struct NodeCache nc,
int *  nnext 
)

Definition at line 487 of file forcetree.c.

488 {
489  int child, nocc;
490  int cur = cur_start;
491  /*Walk the main tree until we get something that isn't an internal node.*/
492  do
493  {
494  /*No lock needed: if we have an internal node here it will be stable*/
495  nocc = tb.Nodes[cur].s.noccupied;
496 
497  /* This node still has space for a particle (or needs conversion)*/
498  if(nocc < (1 << 16))
499  break;
500 
501  /* This node has child subnodes: find them.*/
502  int subnode = get_subnode(&tb.Nodes[cur], i);
503  /*No lock needed: if we have an internal node here it will be stable*/
504  child = tb.Nodes[cur].s.suns[subnode];
505 
506  if(child > tb.lastnode || child < tb.firstnode)
507  endrun(1,"Corruption in tree build: N[%d].[%d] = %d > lastnode (%d)\n",cur, subnode, child, tb.lastnode);
508  cur = child;
509  }
510  while(child >= tb.firstnode);
511 
512  /* We have a guaranteed spot.*/
513  nocc = tb.Nodes[cur].s.noccupied;
514  tb.Nodes[cur].s.noccupied++;
515 
516  /* Now we have something that isn't an internal node. We can place the particle! */
517  if(nocc < NMAXCHILD)
518  modify_internal_node(cur, nocc, i, tb, HybridNuGrav);
519  /* In this case we need to create a new layer of nodes beneath this one*/
520  else if(nocc < 1<<16) {
521  if(create_new_node_layer(cur, i, HybridNuGrav, tb, nnext, nc))
522  return -1;
523  } else
524  endrun(2, "Tried to convert already converted node %d with nocc = %d\n", cur, nocc);
525  return cur;
526 }
void endrun(int where, const char *fmt,...)
Definition: endrun.c:147
int get_subnode(const struct NODE *node, const int p_i)
Definition: forcetree.c:281
static int create_new_node_layer(int firstparent, int p_toplace, const int HybridNuGrav, const ForceTree tb, int *nnext, struct NodeCache *nc)
Definition: forcetree.c:376
static int modify_internal_node(int parent, int subnode, int p_toplace, const ForceTree tb, const int HybridNuGrav)
Definition: forcetree.c:361
#define NMAXCHILD
Definition: forcetree.h:12
struct NODE * Nodes
Definition: forcetree.h:97
int firstnode
Definition: forcetree.h:84
int lastnode
Definition: forcetree.h:86
struct NodeChild s
Definition: forcetree.h:66
int noccupied
Definition: forcetree.h:35
int suns[NMAXCHILD]
Definition: forcetree.h:32

References create_new_node_layer(), endrun(), ForceTree::firstnode, get_subnode(), ForceTree::lastnode, modify_internal_node(), NMAXCHILD, NodeChild::noccupied, ForceTree::Nodes, NODE::s, and NodeChild::suns.

Referenced by force_tree_create_nodes(), and merge_partial_force_trees().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ create_new_node_layer()

static int create_new_node_layer ( int  firstparent,
int  p_toplace,
const int  HybridNuGrav,
const ForceTree  tb,
int *  nnext,
struct NodeCache nc 
)
static

Definition at line 376 of file forcetree.c.

378 {
379  /* This is so we can defer changing
380  * the type of the existing node until the end.*/
381  int parent = firstparent;
382 
383  do {
384  int i;
385  struct NODE *nprnt = &tb.Nodes[parent];
386 
387  /* Braces to scope oldsuns and newsuns*/
388  {
389  int newsuns[NMAXCHILD];
390 
391  int * oldsuns = nprnt->s.suns;
392 
393  /*We have two particles here, so create a new child node to store them both.*/
394  /* if we are here the node must be large enough, thus contain exactly one child. */
395  /* The parent is already a leaf, need to split */
396  /* Get memory for 8 extra nodes from our cache.*/
397  newsuns[0] = get_freenode(nnext, nc);
398  /*If we already have too many nodes, exit loop.*/
399  if(nc->nnext_thread >= tb.lastnode) {
400  /* This means that we have > NMAXCHILD particles in the same place,
401  * which usually indicates a bug in the particle evolution. Print some helpful debug information.*/
402  message(1, "Failed placing %d at %g %g %g, type %d, ID %ld. Others were %d (%g %g %g, t %d ID %ld) and %d (%g %g %g, t %d ID %ld). next %d last %d\n",
403  p_toplace, P[p_toplace].Pos[0], P[p_toplace].Pos[1], P[p_toplace].Pos[2], P[p_toplace].Type, P[p_toplace].ID,
404  oldsuns[0], P[oldsuns[0]].Pos[0], P[oldsuns[0]].Pos[1], P[oldsuns[0]].Pos[2], P[oldsuns[0]].Type, P[oldsuns[0]].ID,
405  oldsuns[1], P[oldsuns[1]].Pos[0], P[oldsuns[1]].Pos[1], P[oldsuns[1]].Pos[2], P[oldsuns[1]].Type, P[oldsuns[1]].ID
406  );
407  nc->nnext_thread = tb.lastnode + 10 * NODECACHE_SIZE;
408  /* If this is not the first layer created,
409  * we need to mark the overall parent as a node node
410  * while marking this one as a particle node */
411  if(firstparent != parent)
412  {
413  nprnt->f.ChildType = PARTICLE_NODE_TYPE;
414  nprnt->s.noccupied = NMAXCHILD;
415  tb.Nodes[firstparent].f.ChildType = NODE_NODE_TYPE;
416  tb.Nodes[firstparent].s.noccupied = (1<<16);
417  }
418  return 1;
419  }
420  for(i=0; i<8; i++) {
421  newsuns[i] = newsuns[0] + i;
422  struct NODE *nfreep = &tb.Nodes[newsuns[i]];
423  /* We create a new leaf node.*/
424  init_internal_node(nfreep, nprnt, i);
425  /*Set father of new node*/
426  nfreep->father = parent;
427  }
428  /*Initialize the remaining entries to empty*/
429  for(i=8; i<NMAXCHILD;i++)
430  newsuns[i] = -1;
431 
432  for(i=0; i < NMAXCHILD; i++) {
433  /* Re-attach each particle to the appropriate new leaf.
434  * Notice that since we have NMAXCHILD slots on each child and NMAXCHILD particles,
435  * we will always have a free slot. */
436  int subnode = get_subnode(nprnt, oldsuns[i]);
437  int child = newsuns[subnode];
438  struct NODE * nchild = &tb.Nodes[child];
439  modify_internal_node(child, nchild->s.noccupied, oldsuns[i], tb, HybridNuGrav);
440  nchild->s.noccupied++;
441  }
442  /* Copy the new node array into the node*/
443  memcpy(nprnt->s.suns, newsuns, NMAXCHILD * sizeof(int));
444  } /* After this brace oldsuns and newsuns are invalid*/
445 
446  /* Set sibling for the new rank. Since empty at this point, point onwards.*/
447  for(i=0; i<7; i++) {
448  int child = nprnt->s.suns[i];
449  struct NODE * nchild = &tb.Nodes[child];
450  nchild->sibling = nprnt->s.suns[i+1];
451  }
452  /* Final child needs special handling: set to the parent's sibling.*/
453  tb.Nodes[nprnt->s.suns[7]].sibling = nprnt->sibling;
454  /* Zero the momenta for the parent*/
455  memset(&nprnt->mom, 0, sizeof(nprnt->mom));
456 
457  /* Now try again to add the new particle*/
458  int subnode = get_subnode(nprnt, p_toplace);
459  int child = nprnt->s.suns[subnode];
460  struct NODE * nchild = &tb.Nodes[child];
461  if(nchild->s.noccupied < NMAXCHILD) {
462  modify_internal_node(child, nchild->s.noccupied, p_toplace, tb, HybridNuGrav);
463  nchild->s.noccupied++;
464  break;
465  }
466  /* The attached particles are already within one subnode of the new node.
467  * Iterate, creating a new layer beneath.*/
468  else {
469  /* The current child is going to have new nodes created beneath it,
470  * so mark it a Node-containing node. It cannot be accessed until
471  * we mark the top-level parent, so no need for atomics.*/
472  tb.Nodes[child].f.ChildType = NODE_NODE_TYPE;
473  tb.Nodes[child].s.noccupied = (1<<16);
474  parent = child;
475  }
476  } while(1);
477 
478  /* A new node is created. Mark the (original) parent as an internal node with node children.
479  * This goes last so that we don't access the child before it is constructed.*/
480  tb.Nodes[firstparent].f.ChildType = NODE_NODE_TYPE;
481  tb.Nodes[firstparent].s.noccupied = (1<<16);
482  return 0;
483 }
void message(int where, const char *fmt,...)
Definition: endrun.c:175
static void init_internal_node(struct NODE *nfreep, struct NODE *parent, int subnode)
Definition: forcetree.c:304
int get_freenode(int *nnext, struct NodeCache *nc)
Definition: forcetree.c:345
#define NODECACHE_SIZE
Definition: forcetree.c:336
#define NODE_NODE_TYPE
Definition: forcetree.h:16
#define PARTICLE_NODE_TYPE
Definition: forcetree.h:15
Definition: forcetree.h:39
int sibling
Definition: forcetree.h:40
unsigned int ChildType
Definition: forcetree.h:49
int father
Definition: forcetree.h:41
struct NODE::@8 f
int nnext_thread
Definition: forcetree.c:340

References NODE::ChildType, NODE::f, NODE::father, get_freenode(), get_subnode(), init_internal_node(), ForceTree::lastnode, message(), modify_internal_node(), NODE::mom, NMAXCHILD, NodeCache::nnext_thread, NodeChild::noccupied, NODE_NODE_TYPE, NODECACHE_SIZE, ForceTree::Nodes, P, PARTICLE_NODE_TYPE, NODE::s, NODE::sibling, and NodeChild::suns.

Referenced by add_particle_to_tree().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ force_create_node_for_topnode()

void force_create_node_for_topnode ( int  no,
int  topnode,
struct NODE Nodes,
const DomainDecomp ddecomp,
int  bits,
int  x,
int  y,
int  z,
int *  nextfree,
const int  lastnode 
)
static

This function recursively creates a set of empty tree nodes which corresponds to the top-level tree for the ddecomp grid. This is done to ensure that this top-level tree is always "complete" so that we can easily associate the pseudo-particles of other CPUs with tree-nodes at a given level in the tree, even when the particle population is so sparse that some of these nodes are actually empty.

Definition at line 818 of file forcetree.c.

819 {
820  int i, j, k;
821 
822  /*We reached the leaf of the toptree*/
823  if(ddecomp->TopNodes[topnode].Daughter < 0)
824  return;
825 
826  for(i = 0; i < 2; i++)
827  for(j = 0; j < 2; j++)
828  for(k = 0; k < 2; k++)
829  {
830  int sub = 7 & peano_hilbert_key((x << 1) + i, (y << 1) + j, (z << 1) + k, bits);
831 
832  int count = i + 2 * j + 4 * k;
833 
834  Nodes[no].s.Types = 0;
835  Nodes[no].s.suns[count] = *nextfree;
836  /*We are an internal top level node as we now have a child top level.*/
837  Nodes[no].f.InternalTopLevel = 1;
838  Nodes[no].f.ChildType = NODE_NODE_TYPE;
839  Nodes[no].s.noccupied = (1<<16);
840 
841  /* We create a new leaf node.*/
842  init_internal_node(&Nodes[*nextfree], &Nodes[no], count);
843  /*Set father of new node*/
844  Nodes[*nextfree].father = no;
845  /*All nodes here are top level nodes*/
846  Nodes[*nextfree].f.TopLevel = 1;
847 
848  const struct topnode_data curtopnode = ddecomp->TopNodes[ddecomp->TopNodes[topnode].Daughter + sub];
849  if(curtopnode.Daughter == -1) {
850  ddecomp->TopLeaves[curtopnode.Leaf].treenode = *nextfree;
851  }
852 
853  (*nextfree)++;
854 
855  if(*nextfree >= lastnode)
856  endrun(11, "Not enough force nodes to topnode grid: need %d\n",lastnode);
857  }
858  /* Set sibling on the child*/
859  for(j=0; j<7; j++) {
860  int chld = Nodes[no].s.suns[j];
861  Nodes[chld].sibling = Nodes[no].s.suns[j+1];
862  }
863  Nodes[Nodes[no].s.suns[7]].sibling = Nodes[no].sibling;
864  for(i = 0; i < 2; i++)
865  for(j = 0; j < 2; j++)
866  for(k = 0; k < 2; k++)
867  {
868  int sub = 7 & peano_hilbert_key((x << 1) + i, (y << 1) + j, (z << 1) + k, bits);
869  int count = i + 2 * j + 4 * k;
870  force_create_node_for_topnode(Nodes[no].s.suns[count], ddecomp->TopNodes[topnode].Daughter + sub, Nodes, ddecomp,
871  bits + 1, 2 * x + i, 2 * y + j, 2 * z + k, nextfree, lastnode);
872  }
873 
874 }
static void force_create_node_for_topnode(int no, int topnode, struct NODE *Nodes, const DomainDecomp *ddecomp, int bits, int x, int y, int z, int *nextfree, const int lastnode)
Definition: forcetree.c:818
peano_t peano_hilbert_key(int x, int y, int z, int bits)
Definition: peano.c:119
struct topnode_data * TopNodes
Definition: domain.h:36
struct topleaf_data * TopLeaves
Definition: domain.h:37
unsigned int TopLevel
Definition: forcetree.h:47
unsigned int InternalTopLevel
Definition: forcetree.h:46
unsigned int Types
Definition: forcetree.h:30
int treenode
Definition: domain.h:24
int Leaf
Definition: domain.h:17
int Daughter
Definition: domain.h:15

References NODE::ChildType, topnode_data::Daughter, endrun(), NODE::f, NODE::father, init_internal_node(), NODE::InternalTopLevel, topnode_data::Leaf, NodeChild::noccupied, NODE_NODE_TYPE, peano_hilbert_key(), NODE::s, NODE::sibling, NodeChild::suns, DomainDecomp::TopLeaves, NODE::TopLevel, DomainDecomp::TopNodes, topleaf_data::treenode, and NodeChild::Types.

Referenced by force_tree_create_nodes().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ force_exchange_pseudodata()

void force_exchange_pseudodata ( ForceTree tree,
const DomainDecomp ddecomp 
)
static

This function communicates the values of the multipole moments of the top-level tree-nodes of the ddecomp grid. This data can then be used to update the pseudo-particles on each CPU accordingly.

Definition at line 1109 of file forcetree.c.

1110 {
1111  int NTask, ThisTask;
1112  int i, no, ta, recvTask;
1113  int *recvcounts, *recvoffset;
1114  struct topleaf_momentsdata
1115  {
1116  MyFloat s[3];
1117  MyFloat mass;
1118  MyFloat hmax;
1119  }
1120  *TopLeafMoments;
1121 
1122  MPI_Comm_size(MPI_COMM_WORLD, &NTask);
1123  MPI_Comm_rank(MPI_COMM_WORLD, &ThisTask);
1124 
1125  TopLeafMoments = (struct topleaf_momentsdata *) mymalloc("TopLeafMoments", ddecomp->NTopLeaves * sizeof(TopLeafMoments[0]));
1126  memset(&TopLeafMoments[0], 0, sizeof(TopLeafMoments[0]) * ddecomp->NTopLeaves);
1127 
1128  for(i = ddecomp->Tasks[ThisTask].StartLeaf; i < ddecomp->Tasks[ThisTask].EndLeaf; i ++) {
1129  no = ddecomp->TopLeaves[i].treenode;
1130  if(ddecomp->TopLeaves[i].Task != ThisTask)
1131  endrun(131231231, "TopLeaf %d Task table is corrupted: task is %d\n", i, ddecomp->TopLeaves[i].Task);
1132 
1133  /* read out the multipole moments from the local base cells */
1134  TopLeafMoments[i].s[0] = tree->Nodes[no].mom.cofm[0];
1135  TopLeafMoments[i].s[1] = tree->Nodes[no].mom.cofm[1];
1136  TopLeafMoments[i].s[2] = tree->Nodes[no].mom.cofm[2];
1137  TopLeafMoments[i].mass = tree->Nodes[no].mom.mass;
1138  TopLeafMoments[i].hmax = tree->Nodes[no].mom.hmax;
1139 
1140  /*Set the local base nodes dependence on local mass*/
1141  while(no >= 0)
1142  {
1143 #ifdef DEBUG
1144  if(tree->Nodes[no].f.ChildType == PSEUDO_NODE_TYPE)
1145  endrun(333, "Pseudo node %d parent of a leaf on this processor %d\n", no, ddecomp->TopLeaves[i].treenode);
1146 #endif
1147  if(tree->Nodes[no].f.DependsOnLocalMass)
1148  break;
1149 
1150  tree->Nodes[no].f.DependsOnLocalMass = 1;
1151 
1152  no = tree->Nodes[no].father;
1153  }
1154  }
1155 
1156  /* share the pseudo-particle data across CPUs */
1157 
1158  recvcounts = (int *) mymalloc("recvcounts", sizeof(int) * NTask);
1159  recvoffset = (int *) mymalloc("recvoffset", sizeof(int) * NTask);
1160 
1161  for(recvTask = 0; recvTask < NTask; recvTask++)
1162  {
1163  recvoffset[recvTask] = ddecomp->Tasks[recvTask].StartLeaf * sizeof(TopLeafMoments[0]);
1164  recvcounts[recvTask] = (ddecomp->Tasks[recvTask].EndLeaf - ddecomp->Tasks[recvTask].StartLeaf) * sizeof(TopLeafMoments[0]);
1165  }
1166 
1167  MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL,
1168  &TopLeafMoments[0], recvcounts, recvoffset,
1169  MPI_BYTE, MPI_COMM_WORLD);
1170 
1171  myfree(recvoffset);
1172  myfree(recvcounts);
1173 
1174 
1175  for(ta = 0; ta < NTask; ta++) {
1176  if(ta == ThisTask) continue; /* bypass ThisTask since it is already up to date */
1177 
1178  for(i = ddecomp->Tasks[ta].StartLeaf; i < ddecomp->Tasks[ta].EndLeaf; i ++) {
1179  no = ddecomp->TopLeaves[i].treenode;
1180 
1181  tree->Nodes[no].mom.cofm[0] = TopLeafMoments[i].s[0];
1182  tree->Nodes[no].mom.cofm[1] = TopLeafMoments[i].s[1];
1183  tree->Nodes[no].mom.cofm[2] = TopLeafMoments[i].s[2];
1184  tree->Nodes[no].mom.mass = TopLeafMoments[i].mass;
1185  tree->Nodes[no].mom.hmax = TopLeafMoments[i].hmax;
1186  }
1187  }
1188  myfree(TopLeafMoments);
1189 }
#define PSEUDO_NODE_TYPE
Definition: forcetree.h:17
#define mymalloc(name, size)
Definition: mymalloc.h:15
#define myfree(x)
Definition: mymalloc.h:19
struct task_data * Tasks
Definition: domain.h:41
int NTopLeaves
Definition: domain.h:40
unsigned int DependsOnLocalMass
Definition: forcetree.h:48
int EndLeaf
Definition: domain.h:30
int StartLeaf
Definition: domain.h:29
int Task
Definition: domain.h:21
int ThisTask
Definition: test_exchange.c:23
int NTask
Definition: test_exchange.c:23
LOW_PRECISION MyFloat
Definition: types.h:19

References NODE::ChildType, NODE::cofm, NODE::DependsOnLocalMass, task_data::EndLeaf, endrun(), NODE::f, NODE::father, NODE::hmax, NODE::mass, NODE::mom, myfree, mymalloc, ForceTree::Nodes, NTask, DomainDecomp::NTopLeaves, PSEUDO_NODE_TYPE, task_data::StartLeaf, topleaf_data::Task, DomainDecomp::Tasks, ThisTask, DomainDecomp::TopLeaves, and topleaf_data::treenode.

Referenced by force_tree_build().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ force_get_father()

int force_get_father ( int  no,
const ForceTree tree 
)

Definition at line 905 of file forcetree.c.

906 {
907  if(no >= tree->firstnode)
908  return tree->Nodes[no].father;
909  else
910  return tree->Father[no];
911 }
int * Father
Definition: forcetree.h:103

References NODE::father, ForceTree::Father, ForceTree::firstnode, and ForceTree::Nodes.

Referenced by _prepare(), check_moments(), check_tree(), effhsml(), force_tree_eh_slots_fork(), ngb_treefind_threads(), set_init_hsml(), and setup_smoothinglengths().

Here is the caller graph for this function:

◆ force_get_sibling()

static int force_get_sibling ( const int  sib,
const int  j,
const int *  suns 
)
static

Definition at line 934 of file forcetree.c.

935 {
936  /* check if we have a sibling on the same level */
937  int jj;
938  int nextsib = sib;
939  for(jj = j + 1; jj < 8; jj++) {
940  if(suns[jj] >= 0) {
941  nextsib = suns[jj];
942  break;
943  }
944  }
945  return nextsib;
946 }

Referenced by force_update_node_recursive().

Here is the caller graph for this function:

◆ force_insert_pseudo_particles()

static void force_insert_pseudo_particles ( const ForceTree tree,
const DomainDecomp ddecomp 
)
static

this function inserts pseudo-particles which will represent the mass distribution of the other CPUs. Initially, the mass of the pseudo-particles is set to zero, and their coordinate is set to the center of the ddecomp-cell they correspond to. These quantities will be updated later on.

Definition at line 883 of file forcetree.c.

884 {
885  int i, index;
886  const int firstpseudo = tree->lastnode;
887  int ThisTask;
888  MPI_Comm_rank(MPI_COMM_WORLD, &ThisTask);
889 
890  for(i = 0; i < ddecomp->NTopLeaves; i++)
891  {
892  index = ddecomp->TopLeaves[i].treenode;
893  if(ddecomp->TopLeaves[i].Task != ThisTask) {
894  if(tree->Nodes[index].s.noccupied != 0)
895  endrun(5, "In node %d, overwriting %d child particles (i = %d etc) with pseudo particle %d\n",
896  index, tree->Nodes[index].s.noccupied, tree->Nodes[index].s.suns[0], i);
897  tree->Nodes[index].f.ChildType = PSEUDO_NODE_TYPE;
898  /* This node points to the pseudo particle*/
899  tree->Nodes[index].s.suns[0] = firstpseudo + i;
900  }
901  }
902 }

References NODE::ChildType, endrun(), NODE::f, ForceTree::lastnode, NodeChild::noccupied, ForceTree::Nodes, DomainDecomp::NTopLeaves, PSEUDO_NODE_TYPE, NODE::s, NodeChild::suns, topleaf_data::Task, ThisTask, DomainDecomp::TopLeaves, and topleaf_data::treenode.

Referenced by force_tree_build().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ force_tree_allocated()

int force_tree_allocated ( const ForceTree tree)

Definition at line 134 of file forcetree.c.

135 {
136  return tree->tree_allocated_flag;
137 }
int tree_allocated_flag
Definition: forcetree.h:78

References ForceTree::tree_allocated_flag.

Referenced by _prepare(), force_tree_free(), force_tree_rebuild(), force_tree_rebuild_mask(), sfr_reserve_slots(), and treewalk_run().

Here is the caller graph for this function:

◆ force_tree_build()

ForceTree force_tree_build ( int  npart,
int  mask,
DomainDecomp ddecomp,
const int  HybridNuGrav,
const int  DoMoments,
const char *  EmergencyOutputDir 
)
static

Constructs the gravitational oct-tree.

The index convention for accessing tree nodes is the following: the indices 0...NumPart-1 reference single particles, the indices firstnode....firstnode +nodes-1 reference tree nodes. ‘Nodes_base’ points to the first tree node, while ‘nodes’ is shifted such that nodes[firstnode] gives the first tree node. Finally, node indices with values 'tb.lastnode' and larger indicate "pseudo particles", i.e. multipole moments of top-level nodes that lie on different CPUs. If such a node needs to be opened, the corresponding particle must be exported to that CPU.

Definition at line 191 of file forcetree.c.

192 {
193  ForceTree tree;
194 
195  int TooManyNodes = 0;
196  int64_t maxnodes = ForceTreeParams.TreeAllocFactor * PartManager->NumPart + ddecomp->NTopNodes;
197  int64_t maxmaxnodes;
198  MPI_Reduce(&maxnodes, &maxmaxnodes, 1, MPI_INT64, MPI_MAX,0, MPI_COMM_WORLD);
199  message(0, "Treebuild: Largest is %g MByte for %ld tree nodes. firstnode %ld. (presently allocated %g MB)\n",
200  maxmaxnodes * sizeof(struct NODE) / (1024.0 * 1024.0), maxmaxnodes, PartManager->MaxPart,
201  mymalloc_usedbytes() / (1024.0 * 1024.0));
202 
203  do
204  {
205  /* Allocate memory. */
206  tree = force_treeallocate(maxnodes, PartManager->MaxPart, ddecomp);
207 
208  tree.BoxSize = PartManager->BoxSize;
209  tree.numnodes = force_tree_create_nodes(tree, npart, mask, ddecomp, HybridNuGrav);
210  if(tree.numnodes >= tree.lastnode - tree.firstnode)
211  {
212  message(1, "Not enough tree nodes (%ld) for %d particles. Created %d\n", maxnodes, npart, tree.numnodes);
213  force_tree_free(&tree);
215  message(1, "TreeAllocFactor from %g to %g now %ld tree nodes\n", ForceTreeParams.TreeAllocFactor, ForceTreeParams.TreeAllocFactor*1.15, maxnodes);
217  if(ForceTreeParams.TreeAllocFactor > 3.0) {
218  TooManyNodes = 1;
219  break;
220  }
221  }
222  }
223  while(tree.numnodes >= tree.lastnode - tree.firstnode);
224 
225  tree.mask = mask;
226 
227  if(MPIU_Any(TooManyNodes, MPI_COMM_WORLD)) {
228  /* Assume scale factor = 1 for dump as position is not affected.*/
229  if(EmergencyOutputDir) {
230  Cosmology CP = {0};
231  CP.Omega0 = 0.3;
232  CP.OmegaLambda = 0.7;
233  CP.HubbleParam = 0.7;
234  dump_snapshot("FORCETREE-DUMP", 1, &CP, EmergencyOutputDir);
235  }
236  endrun(2, "Required too many nodes, snapshot dumped\n");
237  }
238  if(mask == ALLMASK)
239  walltime_measure("/Tree/Build/Nodes");
240 #ifdef DEBUG
241  force_validate_nextlist(&tree);
242 #endif
243  /* insert the pseudo particles that represent the mass distribution of other ddecomps */
244  force_insert_pseudo_particles(&tree, ddecomp);
245 #ifdef DEBUG
246  force_validate_nextlist(&tree);
247 #endif
248 
249  tree.moments_computed_flag = 0;
250 
251  if(DoMoments) {
252  /* now compute the multipole moments recursively */
253  force_update_node_parallel(&tree, ddecomp);
254 #ifdef DEBUG
255  force_validate_nextlist(&tree);
256 #endif
257 
258  /* Exchange the pseudo-data*/
259  force_exchange_pseudodata(&tree, ddecomp);
260 
262  tree.moments_computed_flag = 1;
263  tree.hmax_computed_flag = 1;
264  }
265  tree.Nodes_base = (struct NODE *) myrealloc(tree.Nodes_base, (tree.numnodes +1) * sizeof(struct NODE));
266 
267  /*Update the oct-tree struct so it knows about the memory change*/
268  tree.Nodes = tree.Nodes_base - tree.firstnode;
269 #ifdef DEBUG
270  force_validate_nextlist(&tree);
271 #endif
272  return tree;
273 }
void dump_snapshot(const char *dump, const double Time, const Cosmology *CP, const char *OutputDir)
Definition: checkpoint.c:52
static void force_treeupdate_pseudos(int no, const ForceTree *tree)
Definition: forcetree.c:1195
int force_tree_create_nodes(const ForceTree tb, const int npart, int mask, DomainDecomp *ddecomp, const int HybridNuGrav)
Definition: forcetree.c:661
ForceTree force_treeallocate(int64_t maxnodes, int64_t maxpart, DomainDecomp *ddecomp)
Definition: forcetree.c:1381
void force_update_node_parallel(const ForceTree *tree, const DomainDecomp *ddecomp)
Definition: forcetree.c:1081
static void force_exchange_pseudodata(ForceTree *tree, const DomainDecomp *ddecomp)
Definition: forcetree.c:1109
void force_tree_free(ForceTree *tree)
Definition: forcetree.c:1409
static void force_insert_pseudo_particles(const ForceTree *tree, const DomainDecomp *ddecomp)
Definition: forcetree.c:883
static struct forcetree_params ForceTreeParams
#define ALLMASK
Definition: forcetree.h:20
#define myrealloc(ptr, size)
Definition: mymalloc.h:18
#define mymalloc_usedbytes()
Definition: mymalloc.h:32
struct part_manager_type PartManager[1]
Definition: partmanager.c:11
static Cosmology * CP
Definition: power.c:27
double Omega0
Definition: cosmology.h:10
double HubbleParam
Definition: cosmology.h:20
double OmegaLambda
Definition: cosmology.h:14
int NTopNodes
Definition: domain.h:39
int moments_computed_flag
Definition: forcetree.h:82
double BoxSize
Definition: forcetree.h:106
int mask
Definition: forcetree.h:90
struct NODE * Nodes_base
Definition: forcetree.h:101
int hmax_computed_flag
Definition: forcetree.h:80
int numnodes
Definition: forcetree.h:88
double TreeAllocFactor
Definition: forcetree.c:37
int MPIU_Any(int condition, MPI_Comm comm)
Definition: system.c:545
#define MPI_INT64
Definition: system.h:12
#define walltime_measure(name)
Definition: walltime.h:8

References ALLMASK, ForceTree::BoxSize, part_manager_type::BoxSize, CP, dump_snapshot(), endrun(), ForceTree::firstnode, force_exchange_pseudodata(), force_insert_pseudo_particles(), force_tree_create_nodes(), force_tree_free(), force_treeallocate(), force_treeupdate_pseudos(), force_update_node_parallel(), ForceTreeParams, ForceTree::hmax_computed_flag, Cosmology::HubbleParam, ForceTree::lastnode, ForceTree::mask, part_manager_type::MaxPart, message(), ForceTree::moments_computed_flag, MPI_INT64, MPIU_Any(), mymalloc_usedbytes, myrealloc, ForceTree::Nodes, ForceTree::Nodes_base, DomainDecomp::NTopNodes, ForceTree::numnodes, part_manager_type::NumPart, Cosmology::Omega0, Cosmology::OmegaLambda, PartManager, forcetree_params::TreeAllocFactor, and walltime_measure.

Referenced by force_tree_rebuild(), and force_tree_rebuild_mask().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ force_tree_create_nodes()

int force_tree_create_nodes ( const ForceTree  tb,
const int  npart,
int  mask,
DomainDecomp ddecomp,
const int  HybridNuGrav 
)

Does initial creation of the nodes for the gravitational oct-tree. mask is a bitfield: Only types whose bit is set are added.

Definition at line 661 of file forcetree.c.

662 {
663  int nnext = tb.firstnode; /* index of first free node */
664 
665  /* create an empty root node */
666  {
667  int i;
668  struct NODE *nfreep = &tb.Nodes[nnext]; /* select first node */
669 
670  nfreep->len = PartManager->BoxSize*1.001;
671  for(i = 0; i < 3; i++)
672  nfreep->center[i] = PartManager->BoxSize/2.;
673  for(i = 0; i < NMAXCHILD; i++)
674  nfreep->s.suns[i] = -1;
675  nfreep->s.Types = 0;
676  nfreep->s.noccupied = 0;
677  nfreep->father = -1;
678  nfreep->sibling = -1;
679  nfreep->f.TopLevel = 1;
680  nfreep->f.InternalTopLevel = 0;
681  nfreep->f.DependsOnLocalMass = 0;
682  nfreep->f.ChildType = PARTICLE_NODE_TYPE;
683  nfreep->f.unused = 0;
684  memset(&(nfreep->mom.cofm),0,3*sizeof(MyFloat));
685  nfreep->mom.mass = 0;
686  nfreep->mom.hmax = 0;
687  nnext++;
688  /* create a set of empty nodes corresponding to the top-level ddecomp
689  * grid. We need to generate these nodes first to make sure that we have a
690  * complete top-level tree which allows the easy insertion of the
691  * pseudo-particles in the right place */
692  force_create_node_for_topnode(tb.firstnode, 0, tb.Nodes, ddecomp, 1, 0, 0, 0, &nnext, tb.lastnode);
693  }
694  /* Set up thread-local copies of the topnodes to anchor the subtrees. */
695  int ThisTask, j, t;
696  MPI_Comm_rank(MPI_COMM_WORLD, &ThisTask);
697  const int StartLeaf = ddecomp->Tasks[ThisTask].StartLeaf;
698  const int EndLeaf = ddecomp->Tasks[ThisTask].EndLeaf;
699  const int nthr = omp_get_max_threads();
700  int * topnodes = ta_malloc("topnodes", int, (EndLeaf - StartLeaf) * nthr);
701  /* Topnodes for each thread. For tid 0, just use the real tree. Saves copying the tree back.*/
702  for(j = 0; j < EndLeaf - StartLeaf; j++)
703  topnodes[j] = ddecomp->TopLeaves[j + StartLeaf].treenode;
704  /* Other threads need a copy*/
705  for(t = 1; t < nthr; t++) {
706  for(j = 0; j < EndLeaf - StartLeaf; j++) {
707  /* Make a local copy*/
708  topnodes[j + t * (EndLeaf - StartLeaf)] = nnext;
709  memmove(&tb.Nodes[nnext], &tb.Nodes[topnodes[j]], sizeof(struct NODE));
710  nnext++;
711  }
712  }
713 
714 /* double tstart = second(); */
715  const int first_free = nnext;
716  /* Increment nnext for the threads we are about to initialise.*/
717  nnext += NODECACHE_SIZE * nthr;
718  /* now we insert all particles */
719  #pragma omp parallel
720  {
721  int i;
722  /* Local topnodes*/
723  int tid = omp_get_thread_num();
724  const int * const local_topnodes = topnodes + tid * (EndLeaf - StartLeaf);
725 
726  /* This implements a small thread-local free Node cache.
727  * The cache ensures that Nodes from the same (or close) particles
728  * are created close to each other on the Node list and thus
729  * helps cache locality. I tried each thread getting a separate
730  * part of the tree, and it wasted too much memory. */
731  struct NodeCache nc;
732  nc.nnext_thread = first_free + tid * NODECACHE_SIZE;
733  nc.nrem_thread = NODECACHE_SIZE;
734 
735  /* Stores the last-seen node on this thread.
736  * Since most particles are close to each other, this should save a number of tree walks.*/
737  int this_acc = local_topnodes[0];
738  // message(1, "Topnodes %d real %d\n", local_topnodes[0], topnodes[0]);
739 
740  /* The default schedule is static with a chunk 1/4 the total.
741  * However, particles are sorted by type and then by peano order.
742  * Since we need to merge trees, it is advantageous to have all particles
743  * spatially close be processed by the same thread. This means that the threads should
744  * process particles at a constant offset from the start of the type.
745  * We do this with a static schedule. */
746  int chnksz = PartManager->NumPart/nthr;
747  if(SlotsManager->info[0].enabled && SlotsManager->info[0].size > 0)
748  chnksz = SlotsManager->info[0].size/nthr;
749  if(chnksz < 1000)
750  chnksz = 1000;
751  #pragma omp for schedule(static, chnksz)
752  for(i = 0; i < npart; i++)
753  {
754  /*Can't break from openmp for*/
755  if(nc.nnext_thread >= tb.lastnode)
756  continue;
757 
758  /* Do not add types that do not have their mask bit set.*/
759  if(!((1<<P[i].Type) & mask)) {
760  continue;
761  }
762  /* Do not add garbage/swallowed particles to the tree*/
763  if(P[i].IsGarbage || (P[i].Swallowed && P[i].Type==5))
764  continue;
765 
766  if(P[i].Mass == 0)
767  endrun(12, "Zero mass particle %d type %d id %ld pos %g %g %g\n", i, P[i].Type, P[i].ID, P[i].Pos[0], P[i].Pos[1], P[i].Pos[2]);
768  /*First find the Node for the TopLeaf */
769  int cur;
770  if(inside_node(&tb.Nodes[this_acc], i)) {
771  cur = this_acc;
772  } else {
773  /* Get the topnode to which a particle belongs. Each local tree
774  * has a local set of treenodes copying the global topnodes, except tid 0
775  * which has the real topnodes.*/
776  const int topleaf = domain_get_topleaf(P[i].Key, ddecomp);
777  //int treenode = ddecomp->TopLeaves[topleaf].treenode;
778  cur = local_topnodes[topleaf - StartLeaf];
779  }
780 
781  this_acc = add_particle_to_tree(i, cur, tb, HybridNuGrav, &nc, &nnext);
782  }
783  /* The implicit omp-barrier is important here!*/
784 /* double tend = second(); */
785 /* message(0, "Initial insertion: %.3g ms. First node %d\n", (tend - tstart)*1000, local_topnodes[0]); */
786 
787  /* Merge each topnode separately, using a for loop.
788  * This wastes threads if NTHREAD > NTOPNODES, but it
789  * means only one merge is done per subtree and
790  * it requires no locking.*/
791  #pragma omp for schedule(static, 1)
792  for(i = 0; i < EndLeaf - StartLeaf; i++) {
793  int t;
794  /* These are the addresses of the real topnodes*/
795  const int target = topnodes[i];
796  if(nc.nnext_thread >= tb.lastnode)
797  continue;
798  for(t = 1; t < nthr; t++) {
799  const int righttop = topnodes[i + t * (EndLeaf - StartLeaf)];
800 // message(1, "tid = %d i = %d t = %d Merging %d to %d addresses are %lx - %lx end is %lx\n", omp_get_thread_num(), i, t, righttop, target, &tb.Nodes[righttop], &tb.Nodes[target], &tb.Nodes[nnext]);
801  if(merge_partial_force_trees(target, righttop, &nc, &nnext, tb, HybridNuGrav))
802  break;
803  }
804  }
805  }
806 
807  ta_free(topnodes);
808  return nnext - tb.firstnode;
809 }
static int domain_get_topleaf(const peano_t key, const DomainDecomp *ddecomp)
Definition: domain.h:74
int merge_partial_force_trees(int left, int right, struct NodeCache *nc, int *nnext, const struct ForceTree tb, int HybridNuGrav)
Definition: forcetree.c:533
int add_particle_to_tree(int i, int cur_start, const ForceTree tb, const int HybridNuGrav, struct NodeCache *nc, int *nnext)
Definition: forcetree.c:487
static int inside_node(const struct NODE *node, const int p_i)
Definition: forcetree.c:291
#define ta_malloc(name, type, nele)
Definition: mymalloc.h:25
#define ta_free(p)
Definition: mymalloc.h:28
struct slots_manager_type SlotsManager[1]
Definition: slotsmanager.c:7
unsigned int unused
Definition: forcetree.h:51
int64_t size
Definition: slotsmanager.h:12
struct slot_info info[6]
Definition: slotsmanager.h:112

References add_particle_to_tree(), part_manager_type::BoxSize, NODE::center, NODE::ChildType, NODE::cofm, NODE::DependsOnLocalMass, domain_get_topleaf(), slot_info::enabled, task_data::EndLeaf, endrun(), NODE::f, NODE::father, ForceTree::firstnode, force_create_node_for_topnode(), NODE::hmax, slots_manager_type::info, inside_node(), NODE::InternalTopLevel, ForceTree::lastnode, NODE::len, NODE::mass, merge_partial_force_trees(), NODE::mom, NMAXCHILD, NodeCache::nnext_thread, NodeChild::noccupied, NODECACHE_SIZE, ForceTree::Nodes, NodeCache::nrem_thread, part_manager_type::NumPart, P, PARTICLE_NODE_TYPE, PartManager, NODE::s, NODE::sibling, slot_info::size, SlotsManager, task_data::StartLeaf, NodeChild::suns, ta_free, ta_malloc, DomainDecomp::Tasks, ThisTask, DomainDecomp::TopLeaves, NODE::TopLevel, topleaf_data::treenode, NodeChild::Types, and NODE::unused.

Referenced by do_tree_test(), and force_tree_build().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ force_tree_eh_slots_fork()

static int force_tree_eh_slots_fork ( EIBase event,
void *  userdata 
)
static

Definition at line 110 of file forcetree.c.

111 {
112  /* after a fork, we will attach the new particle to the force tree. */
113  EISlotsFork * ev = (EISlotsFork*) event;
114  int parent = ev->parent;
115  int child = ev->child;
116  ForceTree * tree = (ForceTree * ) userdata;
117  int no = force_get_father(parent, tree);
118  struct NODE * nop = &tree->Nodes[no];
119  /* FIXME: We lose particles if the node is full.
120  * At the moment this does not matter, because
121  * the only new particles are stars, which do not
122  * participate in the SPH tree walk.*/
123  if(nop->s.noccupied < NMAXCHILD) {
124  nop->s.suns[nop->s.noccupied] = child;
125  nop->s.Types += P[child].Type << (3*nop->s.noccupied);
126  nop->s.noccupied++;
127  }
128  if(child < tree->nfather && child >= 0)
129  tree->Father[child] = no;
130  return 0;
131 }
int force_get_father(int no, const ForceTree *tree)
Definition: forcetree.c:905
int64_t child
Definition: slotsmanager.h:151

References EISlotsFork::child, ForceTree::Father, force_get_father(), NMAXCHILD, NodeChild::noccupied, ForceTree::Nodes, P, EISlotsFork::parent, NODE::s, NodeChild::suns, and NodeChild::Types.

Referenced by force_tree_free(), and force_tree_rebuild().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ force_tree_free()

void force_tree_free ( ForceTree tree)

This function frees the memory allocated for the tree, i.e. it frees the space allocated by the function force_treeallocate().

Definition at line 1409 of file forcetree.c.

1410 {
1412 
1413  if(!force_tree_allocated(tree))
1414  return;
1415  myfree(tree->Nodes_base);
1416  myfree(tree->Father);
1417  tree->tree_allocated_flag = 0;
1418 }
int event_unlisten(EventSpec *eh, eventfunc func, void *userdata)
Definition: event.c:26
EventSpec EventSlotsFork
Definition: event.c:54
int force_tree_allocated(const ForceTree *tree)
Definition: forcetree.c:134
static int force_tree_eh_slots_fork(EIBase *event, void *userdata)
Definition: forcetree.c:110

References event_unlisten(), EventSlotsFork, ForceTree::Father, force_tree_allocated(), force_tree_eh_slots_fork(), myfree, ForceTree::Nodes_base, and ForceTree::tree_allocated_flag.

Referenced by _prepare(), do_density_test(), do_force_test(), fof_fof(), force_tree_build(), force_tree_rebuild(), force_tree_rebuild_mask(), metal_return(), run(), run_gravity_test(), runfof(), runpower(), setup_smoothinglengths(), test_rebuild_close(), test_rebuild_flat(), test_rebuild_random(), and turn_on_quasars().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ force_tree_rebuild()

void force_tree_rebuild ( ForceTree tree,
DomainDecomp ddecomp,
const int  HybridNuGrav,
const int  DoMoments,
const char *  EmergencyOutputDir 
)

Definition at line 140 of file forcetree.c.

141 {
142  MPIU_Barrier(MPI_COMM_WORLD);
143  message(0, "Tree construction. (presently allocated=%g MB)\n", mymalloc_usedbytes() / (1024.0 * 1024.0));
144 
145  if(force_tree_allocated(tree)) {
146  force_tree_free(tree);
147  }
148  walltime_measure("/Misc");
149 
150  *tree = force_tree_build(PartManager->NumPart, ALLMASK, ddecomp, HybridNuGrav, DoMoments, EmergencyOutputDir);
151 
153  walltime_measure("/Tree/Build/Moments");
154 
155  message(0, "Tree constructed (moments: %d). First node %d, number of nodes %d, first pseudo %d. NTopLeaves %d\n",
156  tree->moments_computed_flag, tree->firstnode, tree->numnodes, tree->lastnode, tree->NTopLeaves);
157  MPIU_Barrier(MPI_COMM_WORLD);
158 }
int event_listen(EventSpec *eh, eventfunc func, void *userdata)
Definition: event.c:6
static ForceTree force_tree_build(int npart, int mask, DomainDecomp *ddecomp, const int HybridNuGrav, const int DoMoments, const char *EmergencyOutputDir)
Definition: forcetree.c:191
int NTopLeaves
Definition: forcetree.h:94
#define MPIU_Barrier(comm)
Definition: system.h:103

References ALLMASK, event_listen(), EventSlotsFork, ForceTree::firstnode, force_tree_allocated(), force_tree_build(), force_tree_eh_slots_fork(), force_tree_free(), ForceTree::lastnode, message(), ForceTree::moments_computed_flag, MPIU_Barrier, mymalloc_usedbytes, ForceTree::NTopLeaves, ForceTree::numnodes, part_manager_type::NumPart, PartManager, and walltime_measure.

Referenced by do_density_test(), do_force_test(), run(), run_gravity_test(), runfof(), runpower(), and setup_smoothinglengths().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ force_tree_rebuild_mask()

void force_tree_rebuild_mask ( ForceTree tree,
DomainDecomp ddecomp,
int  mask,
const int  HybridNuGrav,
const char *  EmergencyOutputDir 
)

Definition at line 161 of file forcetree.c.

162 {
163  MPIU_Barrier(MPI_COMM_WORLD);
164  message(0, "Tree construction for types: %d.\n", mask);
165 
166  if(force_tree_allocated(tree)) {
167  force_tree_free(tree);
168  }
169  walltime_measure("/Misc");
170 
171  /* No moments*/
172  *tree = force_tree_build(PartManager->NumPart, mask, ddecomp, HybridNuGrav, 0, EmergencyOutputDir);
173  walltime_measure("/SPH/Build");
174 
175  message(0, "Tree constructed (type mask: %d). First node %d, number of nodes %d, first pseudo %d. NTopLeaves %d\n",
176  mask, tree->firstnode, tree->numnodes, tree->lastnode, tree->NTopLeaves);
177  MPIU_Barrier(MPI_COMM_WORLD);
178 }

References ForceTree::firstnode, force_tree_allocated(), force_tree_build(), force_tree_free(), ForceTree::lastnode, message(), MPIU_Barrier, ForceTree::NTopLeaves, ForceTree::numnodes, part_manager_type::NumPart, PartManager, and walltime_measure.

Referenced by fof_fof(), metal_return(), and turn_on_quasars().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ force_treeallocate()

ForceTree force_treeallocate ( int64_t  maxnodes,
int64_t  maxpart,
DomainDecomp ddecomp 
)

This function allocates the memory used for storage of the tree and of auxiliary arrays needed for tree-walk and link-lists. Usually, maxnodes approximately equal to 0.7*maxpart is sufficient to store the tree for up to maxpart particles.

Definition at line 1381 of file forcetree.c.

1382 {
1383  ForceTree tb;
1384 
1385  tb.Father = (int *) mymalloc("Father", maxpart * sizeof(int));
1386  tb.nfather = maxpart;
1387 #ifdef DEBUG
1388  memset(tb.Father, -1, maxpart * sizeof(int));
1389 #endif
1390  tb.Nodes_base = (struct NODE *) mymalloc("Nodes_base", (maxnodes + 1) * sizeof(struct NODE));
1391 #ifdef DEBUG
1392  memset(tb.Nodes_base, -1, (maxnodes + 1) * sizeof(struct NODE));
1393 #endif
1394  tb.firstnode = maxpart;
1395  tb.lastnode = maxpart + maxnodes;
1396  if(maxpart + maxnodes >= 1L<<30)
1397  endrun(5, "Size of tree overflowed for maxpart = %ld, maxnodes = %ld!\n", maxpart, maxnodes);
1398  tb.numnodes = 0;
1399  tb.Nodes = tb.Nodes_base - maxpart;
1400  tb.tree_allocated_flag = 1;
1401  tb.NTopLeaves = ddecomp->NTopLeaves;
1402  tb.TopLeaves = ddecomp->TopLeaves;
1403  return tb;
1404 }
int nfather
Definition: forcetree.h:104
struct topleaf_data * TopLeaves
Definition: forcetree.h:92

References endrun(), ForceTree::Father, ForceTree::firstnode, ForceTree::lastnode, mymalloc, ForceTree::nfather, ForceTree::Nodes, ForceTree::Nodes_base, DomainDecomp::NTopLeaves, ForceTree::NTopLeaves, ForceTree::numnodes, DomainDecomp::TopLeaves, ForceTree::TopLeaves, and ForceTree::tree_allocated_flag.

Referenced by force_tree_build(), test_rebuild_close(), test_rebuild_flat(), and test_rebuild_random().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ force_treeupdate_pseudos()

void force_treeupdate_pseudos ( int  no,
const ForceTree tree 
)
static

This function updates the top-level tree after the multipole moments of the pseudo-particles have been updated.

Definition at line 1195 of file forcetree.c.

1196 {
1197  int j, p;
1198  MyFloat hmax;
1199  MyFloat s[3], mass;
1200 
1201  mass = 0;
1202  s[0] = 0;
1203  s[1] = 0;
1204  s[2] = 0;
1205  hmax = 0;
1206 
1207  /* This happens if we have a trivial domain with only one entry*/
1208  if(!tree->Nodes[no].f.InternalTopLevel)
1209  return;
1210 
1211  p = tree->Nodes[no].s.suns[0];
1212 
1213  /* since we are dealing with top-level nodes, we know that there are 8 consecutive daughter nodes */
1214  for(j = 0; j < 8; j++)
1215  {
1216  /*This may not happen as we are an internal top level node*/
1217  if(p < tree->firstnode || p >= tree->lastnode)
1218  endrun(6767, "Updating pseudos: %d -> %d which is not an internal node between %d and %d.",no, p, tree->firstnode, tree->lastnode);
1219 #ifdef DEBUG
1220  /* Check we don't move to another part of the tree*/
1221  if(tree->Nodes[p].father != no)
1222  endrun(6767, "Tried to update toplevel node %d with parent %d != expected %d\n", p, tree->Nodes[p].father, no);
1223 #endif
1224 
1225  if(tree->Nodes[p].f.InternalTopLevel)
1226  force_treeupdate_pseudos(p, tree);
1227 
1228  mass += (tree->Nodes[p].mom.mass);
1229  s[0] += (tree->Nodes[p].mom.mass * tree->Nodes[p].mom.cofm[0]);
1230  s[1] += (tree->Nodes[p].mom.mass * tree->Nodes[p].mom.cofm[1]);
1231  s[2] += (tree->Nodes[p].mom.mass * tree->Nodes[p].mom.cofm[2]);
1232 
1233  if(tree->Nodes[p].mom.hmax > hmax)
1234  hmax = tree->Nodes[p].mom.hmax;
1235 
1236  p = tree->Nodes[p].sibling;
1237  }
1238 
1239  if(mass)
1240  {
1241  s[0] /= mass;
1242  s[1] /= mass;
1243  s[2] /= mass;
1244  }
1245  else
1246  {
1247  s[0] = tree->Nodes[no].center[0];
1248  s[1] = tree->Nodes[no].center[1];
1249  s[2] = tree->Nodes[no].center[2];
1250  }
1251 
1252  tree->Nodes[no].mom.cofm[0] = s[0];
1253  tree->Nodes[no].mom.cofm[1] = s[1];
1254  tree->Nodes[no].mom.cofm[2] = s[2];
1255  tree->Nodes[no].mom.mass = mass;
1256 
1257  tree->Nodes[no].mom.hmax = hmax;
1258 }

References NODE::center, NODE::cofm, endrun(), NODE::f, NODE::father, ForceTree::firstnode, NODE::hmax, NODE::InternalTopLevel, ForceTree::lastnode, NODE::mass, NODE::mom, ForceTree::Nodes, NODE::s, NODE::sibling, and NodeChild::suns.

Referenced by force_tree_build().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ force_update_hmax()

void force_update_hmax ( int *  activeset,
int  size,
ForceTree tree,
DomainDecomp ddecomp 
)

This function updates the hmax-values in tree nodes that hold SPH particles. Since the Hsml-values are potentially changed for active particles in the SPH-density computation, force_update_hmax() should be carried out just before the hydrodynamical SPH forces are computed, i.e. after density().

The purpose of the hmax node is for a symmetric treewalk (currently only the hydro). Particles where P[i].Pos + Hsml pokes beyond the exterior of the tree node may mean that a tree node should be included when it would normally be culled. Therefore we don't really want hmax, we want the maximum amount P[i].Pos + Hsml pokes beyond the tree node.

Definition at line 1271 of file forcetree.c.

1272 {
1273  int NTask, ThisTask, recvTask;
1274  int i, ta;
1275  int *recvcounts, *recvoffset;
1276 
1277  walltime_measure("/Misc");
1278 
1279  /* If hmax has not yet been computed, do all particles*/
1280  if(!tree->hmax_computed_flag) {
1281  activeset = NULL;
1282  size = PartManager->NumPart;
1283  }
1284  MPI_Comm_size(MPI_COMM_WORLD, &NTask);
1285  MPI_Comm_rank(MPI_COMM_WORLD, &ThisTask);
1286 
1287  #pragma omp parallel for
1288  for(i = 0; i < size; i++)
1289  {
1290  const int p_i = activeset ? activeset[i] : i;
1291 
1292  if(P[p_i].Type != 0 || P[p_i].IsGarbage)
1293  continue;
1294 
1295  int no = tree->Father[p_i];
1296 
1297  while(no >= 0)
1298  {
1299  /* How much does this particle peek beyond this node?
1300  * Note len does not change so we can read it without a lock or atomic. */
1301  MyFloat readhmax, newhmax = 0;
1302  int j, done = 0;
1303  for(j = 0; j < 3; j++) {
1304  /* Compute each direction independently and take the maximum.
1305  * This is the largest possible distance away from node center within a cube bounding hsml.
1306  * Note that because Pos - Center < len, the maximum value this can have is Hsml.*/
1307  newhmax = DMAX(newhmax, fabs(P[p_i].Pos[j] - tree->Nodes[no].center[j]) + P[p_i].Hsml - tree->Nodes[no].len);
1308  }
1309  /* Most particles will lie fully inside a node. No need then for the atomic! */
1310  if(newhmax <= 0)
1311  break;
1312 
1313  #pragma omp atomic read
1314  readhmax = tree->Nodes[no].mom.hmax;
1315  do {
1316  if(newhmax <= readhmax) {
1317  done = 1;
1318  break;
1319  }
1320  /* Swap in the new hmax only if the old one hasn't changed. */
1321  } while(!__atomic_compare_exchange(&(tree->Nodes[no].mom.hmax), &readhmax, &newhmax, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED));
1322 
1323  if(done)
1324  break;
1325  no = tree->Nodes[no].father;
1326  }
1327  }
1328 
1329  double * TopLeafhmax = (double *) mymalloc("TopLeafMoments", ddecomp->NTopLeaves * sizeof(double));
1330  memset(&TopLeafhmax[0], 0, sizeof(double) * ddecomp->NTopLeaves);
1331 
1332  for(i = ddecomp->Tasks[ThisTask].StartLeaf; i < ddecomp->Tasks[ThisTask].EndLeaf; i ++) {
1333  int no = ddecomp->TopLeaves[i].treenode;
1334  TopLeafhmax[i] = tree->Nodes[no].mom.hmax;
1335  }
1336 
1337  /* share the hmax-data of the dirty nodes accross CPUs */
1338  recvcounts = (int *) mymalloc("recvcounts", sizeof(int) * NTask);
1339  recvoffset = (int *) mymalloc("recvoffset", sizeof(int) * NTask);
1340 
1341  for(recvTask = 0; recvTask < NTask; recvTask++)
1342  {
1343  recvoffset[recvTask] = ddecomp->Tasks[recvTask].StartLeaf;
1344  recvcounts[recvTask] = ddecomp->Tasks[recvTask].EndLeaf - ddecomp->Tasks[recvTask].StartLeaf;
1345  }
1346 
1347  MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL,
1348  &TopLeafhmax[0], recvcounts, recvoffset,
1349  MPI_DOUBLE, MPI_COMM_WORLD);
1350 
1351  myfree(recvoffset);
1352  myfree(recvcounts);
1353 
1354  for(ta = 0; ta < NTask; ta++) {
1355  if(ta == ThisTask)
1356  continue; /* bypass ThisTask since it is already up to date */
1357  for(i = ddecomp->Tasks[ta].StartLeaf; i < ddecomp->Tasks[ta].EndLeaf; i ++) {
1358  int no = ddecomp->TopLeaves[i].treenode;
1359  tree->Nodes[no].mom.hmax = TopLeafhmax[i];
1360 
1361  while(no >= 0)
1362  {
1363  if(TopLeafhmax[i] <= tree->Nodes[no].mom.hmax)
1364  break;
1365  tree->Nodes[no].mom.hmax = TopLeafhmax[i];
1366  no = tree->Nodes[no].father;
1367  }
1368  }
1369  }
1370  myfree(TopLeafhmax);
1371 
1372  tree->hmax_computed_flag = 1;
1373  walltime_measure("/Tree/HmaxUpdate");
1374 }

References NODE::center, DMAX, task_data::EndLeaf, NODE::father, ForceTree::Father, NODE::hmax, ForceTree::hmax_computed_flag, NODE::len, NODE::mom, myfree, mymalloc, ForceTree::Nodes, NTask, DomainDecomp::NTopLeaves, part_manager_type::NumPart, P, PartManager, task_data::StartLeaf, DomainDecomp::Tasks, ThisTask, DomainDecomp::TopLeaves, topleaf_data::treenode, and walltime_measure.

Referenced by run().

Here is the caller graph for this function:

◆ force_update_node_parallel()

void force_update_node_parallel ( const ForceTree tree,
const DomainDecomp ddecomp 
)

This routine determines the multipole moments for a given internal node and all its subnodes in parallel, assigning the recursive algorithm to different threads using openmp's task api. The result is stored in tb.Nodes in the sequence of this tree-walk.

  • A new task is spawned from each down from each local topleaf. Local topleaves are used so that we do not waste time trying moment calculation with pseudoparticles.
  • Each internal node found at that level is added to a list, together with its sibling.
  • Each node in this list then has the recursive moment calculation called on it. Note: If the tree is very unbalanced and one branch much deeper than the others, this will not be efficient.
  • Once each tree's recursive moment is generated in parallel, the tail value from each recursion is stored, and the node marked as done.
  • A final recursive moment calculation is run in serial for the top 3 levels of the tree. When it encounters one of the pre-computed nodes, it searches the list of pre-computed tail values to set the next node as if it had recursed and continues.

Definition at line 1081 of file forcetree.c.

1082 {
1083  int ThisTask;
1084  MPI_Comm_rank(MPI_COMM_WORLD, &ThisTask);
1085 
1086 #pragma omp parallel
1087 #pragma omp single nowait
1088  {
1089  int i;
1090  for(i = ddecomp->Tasks[ThisTask].StartLeaf; i < ddecomp->Tasks[ThisTask].EndLeaf; i ++) {
1091  const int no = ddecomp->TopLeaves[i].treenode;
1092  /* Nodes containing other nodes: the overwhelmingly likely case.*/
1093  if(tree->Nodes[no].f.ChildType == NODE_NODE_TYPE) {
1094  #pragma omp task default(none) shared(tree) firstprivate(no)
1095  force_update_node_recursive(no, tree->Nodes[no].sibling, 1, tree);
1096  }
1097  else if(tree->Nodes[no].f.ChildType == PARTICLE_NODE_TYPE)
1098  force_update_particle_node(no, tree);
1099  else if(tree->Nodes[no].f.ChildType == PSEUDO_NODE_TYPE)
1100  endrun(5, "Error, found pseudo node %d but domain entry %d says on task %d\n", no, i, ThisTask);
1101  }
1102  }
1103 }
static void force_update_particle_node(int no, const ForceTree *tree)
Definition: forcetree.c:950
static int force_update_node_recursive(int no, int sib, int level, const ForceTree *tree)
Definition: forcetree.c:982

References NODE::ChildType, endrun(), NODE::f, force_update_node_recursive(), force_update_particle_node(), NODE_NODE_TYPE, ForceTree::Nodes, PARTICLE_NODE_TYPE, PSEUDO_NODE_TYPE, NODE::sibling, task_data::StartLeaf, DomainDecomp::Tasks, ThisTask, DomainDecomp::TopLeaves, and topleaf_data::treenode.

Referenced by do_tree_test(), and force_tree_build().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ force_update_node_recursive()

static int force_update_node_recursive ( int  no,
int  sib,
int  level,
const ForceTree tree 
)
static

this routine determines the multipole moments for a given internal node and all its subnodes using a recursive computation. The result is stored in tb.Nodes in the sequence of this tree-walk.

The function also computes the NextNode and sibling linked lists. The return value is the current tail of the NextNode linked list.

This function is called recursively using openmp tasks. We spawn a new task for a fixed number of levels of the tree.

Definition at line 982 of file forcetree.c.

983 {
984 #ifdef DEBUG
985  if(tree->Nodes[no].f.ChildType != NODE_NODE_TYPE)
986  endrun(3, "force_update_node_recursive called on node %d of type %d != %d!\n", no, tree->Nodes[no].f.ChildType, NODE_NODE_TYPE);
987 #endif
988  int j;
989  int * suns = tree->Nodes[no].s.suns;
990 
991  int childcnt = 0;
992  /* Remove any empty children, moving the suns array around
993  * so non-empty entries are contiguous at the beginning of the array.
994  * This sharply reduces the size of the tree.
995  * Also count the node children for thread balancing.*/
996  int jj = 0;
997  for(j=0; j < 8; j++, jj++) {
998  /* Never remove empty top-level nodes so we don't
999  * mess up the pseudo-data exchange.
1000  * This may happen for a pseudo particle host or, in very rare cases,
1001  * when one of the local domains is empty. */
1002  while(jj < 8 && !tree->Nodes[suns[jj]].f.TopLevel &&
1003  tree->Nodes[suns[jj]].f.ChildType == PARTICLE_NODE_TYPE &&
1004  tree->Nodes[suns[jj]].s.noccupied == 0) {
1005  jj++;
1006  }
1007  if(jj < 8)
1008  suns[j] = suns[jj];
1009  else
1010  suns[j] = -1;
1011  if(suns[j] >= 0 && tree->Nodes[suns[j]].f.ChildType == NODE_NODE_TYPE)
1012  childcnt++;
1013  }
1014 
1015  /*First do the children*/
1016  for(j = 0; j < 8; j++)
1017  {
1018  int p = suns[j];
1019  /*Empty slot*/
1020  if(p < 0)
1021  continue;
1022  const int nextsib = force_get_sibling(sib, j, suns);
1023  /* This is set in create_nodes but needed because we may remove empty nodes above.*/
1024  tree->Nodes[p].sibling = nextsib;
1025  /* Nodes containing particles or pseudo-particles*/
1026  if(tree->Nodes[p].f.ChildType == PARTICLE_NODE_TYPE)
1027  force_update_particle_node(p, tree);
1028  if(tree->Nodes[p].f.ChildType == NODE_NODE_TYPE) {
1029  /* Don't spawn a new task if we are deep enough that we already spawned a lot.*/
1030  if(childcnt > 1 && level < 512) {
1031  #pragma omp task default(none) shared(level, childcnt, tree) firstprivate(nextsib, p)
1032  force_update_node_recursive(p, nextsib, level*childcnt, tree);
1033  }
1034  else
1035  force_update_node_recursive(p, nextsib, level, tree);
1036  }
1037  }
1038 
1039  /*Make sure all child nodes are done*/
1040  #pragma omp taskwait
1041 
1042  /*Now we do the moments*/
1043  for(j = 0; j < 8; j++)
1044  {
1045  const int p = suns[j];
1046  if(p < 0)
1047  continue;
1048  tree->Nodes[no].mom.mass += (tree->Nodes[p].mom.mass);
1049  tree->Nodes[no].mom.cofm[0] += (tree->Nodes[p].mom.mass * tree->Nodes[p].mom.cofm[0]);
1050  tree->Nodes[no].mom.cofm[1] += (tree->Nodes[p].mom.mass * tree->Nodes[p].mom.cofm[1]);
1051  tree->Nodes[no].mom.cofm[2] += (tree->Nodes[p].mom.mass * tree->Nodes[p].mom.cofm[2]);
1052  if(tree->Nodes[p].mom.hmax > tree->Nodes[no].mom.hmax)
1053  tree->Nodes[no].mom.hmax = tree->Nodes[p].mom.hmax;
1054  }
1055 
1056  /*Set the center of mass moments*/
1057  const double mass = tree->Nodes[no].mom.mass;
1058  /* In principle all the children could be pseudo-particles*/
1059  if(mass > 0) {
1060  tree->Nodes[no].mom.cofm[0] /= mass;
1061  tree->Nodes[no].mom.cofm[1] /= mass;
1062  tree->Nodes[no].mom.cofm[2] /= mass;
1063  }
1064 
1065  return -1;
1066 }
static int force_get_sibling(const int sib, const int j, const int *suns)
Definition: forcetree.c:934

References NODE::ChildType, NODE::cofm, endrun(), NODE::f, force_get_sibling(), force_update_particle_node(), NODE::hmax, NODE::mass, NODE::mom, NodeChild::noccupied, NODE_NODE_TYPE, ForceTree::Nodes, PARTICLE_NODE_TYPE, NODE::s, NODE::sibling, and NodeChild::suns.

Referenced by force_update_node_parallel().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ force_update_particle_node()

static void force_update_particle_node ( int  no,
const ForceTree tree 
)
static

Definition at line 950 of file forcetree.c.

951 {
952 #ifdef DEBUG
953  if(tree->Nodes[no].f.ChildType != PARTICLE_NODE_TYPE)
954  endrun(3, "force_update_particle_node called on node %d of wrong type!\n", no);
955 #endif
956  int j;
957  /*Set the center of mass moments*/
958  const double mass = tree->Nodes[no].mom.mass;
959  /* Be careful about empty nodes*/
960  if(mass > 0) {
961  for(j = 0; j < 3; j++)
962  tree->Nodes[no].mom.cofm[j] /= mass;
963  }
964  else {
965  for(j = 0; j < 3; j++)
966  tree->Nodes[no].mom.cofm[j] = tree->Nodes[no].center[j];
967  }
968 }

References NODE::center, NODE::ChildType, NODE::cofm, endrun(), NODE::f, NODE::mass, NODE::mom, ForceTree::Nodes, and PARTICLE_NODE_TYPE.

Referenced by force_update_node_parallel(), and force_update_node_recursive().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ get_freenode()

int get_freenode ( int *  nnext,
struct NodeCache nc 
)

Definition at line 345 of file forcetree.c.

346 {
347  /*Get memory for an extra node from our cache.*/
348  if(nc->nrem_thread < 8) {
351  }
352  const int ninsert = nc->nnext_thread;
353  nc->nnext_thread += 8;
354  nc->nrem_thread -= 8;
355  return ninsert;
356 }
int nrem_thread
Definition: forcetree.c:341
static int atomic_fetch_and_add(int *ptr, int value)
Definition: system.h:78

References atomic_fetch_and_add(), NodeCache::nnext_thread, NODECACHE_SIZE, and NodeCache::nrem_thread.

Referenced by create_new_node_layer().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ get_subnode()

int get_subnode ( const struct NODE node,
const int  p_i 
)

Definition at line 281 of file forcetree.c.

282 {
283  /*Loop is unrolled to help out the compiler,which normally only manages it at -O3*/
284  return (P[p_i].Pos[0] > node->center[0]) +
285  ((P[p_i].Pos[1] > node->center[1]) << 1) +
286  ((P[p_i].Pos[2] > node->center[2]) << 2);
287 }

References NODE::center, and P.

Referenced by add_particle_to_tree(), and create_new_node_layer().

Here is the caller graph for this function:

◆ init_forcetree_params()

void init_forcetree_params ( const int  FastParticleType)

Definition at line 43 of file forcetree.c.

44 {
45  /* This was increased due to the extra nodes created by subtrees*/
47  ForceTreeParams.FastParticleType = FastParticleType;
48 }
int FastParticleType
Definition: forcetree.c:39

References forcetree_params::FastParticleType, ForceTreeParams, and forcetree_params::TreeAllocFactor.

Referenced by begrun(), setup_density(), setup_tree(), and test_fof().

Here is the caller graph for this function:

◆ init_internal_node()

static void init_internal_node ( struct NODE nfreep,
struct NODE parent,
int  subnode 
)
static

Definition at line 304 of file forcetree.c.

305 {
306  int j;
307  const MyFloat lenhalf = 0.25 * parent->len;
308  nfreep->len = 0.5 * parent->len;
309  nfreep->sibling = -10;
310  nfreep->father = -10;
311  nfreep->f.TopLevel = 0;
312  nfreep->f.InternalTopLevel = 0;
313  nfreep->f.DependsOnLocalMass = 0;
314  nfreep->f.ChildType = PARTICLE_NODE_TYPE;
315  nfreep->f.unused = 0;
316 
317  for(j = 0; j < 3; j++) {
318  /* Detect which quadrant we are in by testing the bits of subnode:
319  * if (subnode & [1,2,4]) is true we add lenhalf, otherwise subtract lenhalf*/
320  const int sign = (subnode & (1 << j)) ? 1 : -1;
321  nfreep->center[j] = parent->center[j] + sign*lenhalf;
322  }
323  for(j = 0; j < NMAXCHILD; j++)
324  nfreep->s.suns[j] = -1;
325  nfreep->s.noccupied = 0;
326  nfreep->s.Types = 0;
327  memset(&(nfreep->mom.cofm),0,3*sizeof(MyFloat));
328  nfreep->mom.mass = 0;
329  nfreep->mom.hmax = 0;
330 }

References NODE::center, NODE::ChildType, NODE::cofm, NODE::DependsOnLocalMass, NODE::f, NODE::father, NODE::hmax, NODE::InternalTopLevel, NODE::len, NODE::mass, NODE::mom, NMAXCHILD, NodeChild::noccupied, PARTICLE_NODE_TYPE, NODE::s, NODE::sibling, NodeChild::suns, NODE::TopLevel, NodeChild::Types, and NODE::unused.

Referenced by create_new_node_layer(), and force_create_node_for_topnode().

Here is the caller graph for this function:

◆ inside_node()

static int inside_node ( const struct NODE node,
const int  p_i 
)
inlinestatic

Definition at line 291 of file forcetree.c.

292 {
293  /*One can also use a loop, but the compiler unrolls it only at -O3,
294  *so this is a little faster*/
295  int inside =
296  (fabs(2*(P[p_i].Pos[0] - node->center[0])) <= node->len) *
297  (fabs(2*(P[p_i].Pos[1] - node->center[1])) <= node->len) *
298  (fabs(2*(P[p_i].Pos[2] - node->center[2])) <= node->len);
299  return inside;
300 }

References NODE::center, NODE::len, and P.

Referenced by force_tree_create_nodes().

Here is the caller graph for this function:

◆ merge_partial_force_trees()

int merge_partial_force_trees ( int  left,
int  right,
struct NodeCache nc,
int *  nnext,
const struct ForceTree  tb,
int  HybridNuGrav 
)

Definition at line 533 of file forcetree.c.

534 {
535  int this_left = left;
536  int this_right = right;
537  const int left_end = tb.Nodes[left].sibling;
538  const int right_end = tb.Nodes[right].sibling;
539 // message(5, "Ends: %d %d\n", left_end, right_end);
540  while(this_left != left_end && this_right != right_end)
541  {
542  if(this_left < tb.firstnode || this_right < tb.firstnode)
543  endrun(10, "Encountered invalid node: %d %d < first %d\n", this_left, this_right, tb.firstnode);
544  struct NODE * nleft = &tb.Nodes[this_left];
545  struct NODE * nright = &tb.Nodes[this_right];
546  if(nc->nnext_thread >= tb.lastnode)
547  return 1;
548 #ifdef DEBUG
549  /* Stop when we reach another topnode*/
550  if((nleft->f.TopLevel && this_left != left) || (nright->f.TopLevel && this_right != right))
551  endrun(6, "Encountered another topnode: left %d == right %d! type %d\n", this_left, this_right, nleft->f.ChildType);
552  if(this_left == this_right)
553  endrun(6, "Odd: left %d == right %d! type %d\n", this_left, this_right, nleft->f.ChildType);
554 // message(1, "left %d right %d\n", this_left, this_right);
555  /* Trees should be synced*/
556  if(fabs(nleft->len / nright->len-1) > 1e-6)
557  endrun(6, "Merge unsynced trees: %d %d len %g %g\n", this_left, this_right, nleft->len, nright->len);
558 #endif
559  /* Two node nodes: keep walking down*/
560  if(nleft->f.ChildType == NODE_NODE_TYPE && nright->f.ChildType == NODE_NODE_TYPE) {
561  if(tb.Nodes[nleft->s.suns[0]].father < 0 || tb.Nodes[nright->s.suns[0]].father < 0)
562  endrun(7, "Walking to nodes (%d %d) from (%d %d) fathers (%d %d)\n",
563  nleft->s.suns[0], nright->s.suns[0], this_left, this_right, tb.Nodes[nleft->s.suns[0]].father, tb.Nodes[nright->s.suns[0]].father);
564  this_left = nleft->s.suns[0];
565  this_right = nright->s.suns[0];
566  continue;
567  }
568  /* If the right node has particles, add them to the left node, go to sibling on right and left.*/
569  else if(nright->f.ChildType == PARTICLE_NODE_TYPE) {
570  int i;
571  for(i = 0; i < nright->s.noccupied; i++) {
572  if(nright->s.suns[i] >= tb.firstnode)
573  endrun(8, "Bad child %d of %d\n", i, nright->s.suns[i], this_right);
574  if(add_particle_to_tree(nright->s.suns[i], this_left, tb, HybridNuGrav, nc, nnext) < 0)
575  return 1;
576  }
577  /* Make sure that nodes which have
578  * this_right as a sibling (there will
579  * be a max of one, as it is a particle node
580  * with no children) point to the replacement
581  * on the left*/
582  /* This condition is checking for the root node, which has no siblings*/
583  if(this_right > right) {
584  /* Find the father, then the next child*/
585  struct NODE * fat = &tb.Nodes[nright->father];
586  /* Find the position of this child in the father*/
587  int sunloc = 0;
588  for(i = 0; i < 8; i++)
589  {
590  if(fat->s.suns[i] == this_right) {
591  sunloc = i;
592  break;
593  }
594  }
595  /* Change the sibling of the child next to this one*/
596  if(sunloc > 0) {
597  if(tb.Nodes[fat->s.suns[i-1]].sibling == this_right)
598  tb.Nodes[fat->s.suns[i-1]].sibling = this_left;
599  }
600  }
601  /* Mark the right node as now invalid*/
602  nright->father = -5;
603  /* Now go to sibling*/
604  this_left = nleft->sibling;
605  this_right = nright->sibling;
606  continue;
607  }
608  /* If the left node has particles, add them to the right node,
609  * then copy the right node over the left node and go to (old) sibling on right and left.*/
610  else if(nleft->f.ChildType == PARTICLE_NODE_TYPE && nright->f.ChildType == NODE_NODE_TYPE) {
611  /* Add the left particles to the right*/
612  int i;
613  for(i = 0; i < nleft->s.noccupied; i++) {
614  if(nleft->s.suns[i] >= tb.firstnode)
615  endrun(8, "Bad child %d of %d\n", i, nleft->s.suns[i], this_left);
616  if(add_particle_to_tree(nleft->s.suns[i], this_right, tb, HybridNuGrav, nc, nnext) < 0)
617  return 1;
618  }
619  /* Copy the right node over the left*/
620  memmove(&nleft->s, &nright->s, sizeof(nleft->s));
621  nleft->f.ChildType = NODE_NODE_TYPE;
622  /* Zero the momenta for the parent*/
623  memset(&nleft->mom, 0, sizeof(nleft->mom));
624  /* Reset children to the new parent:
625  * this assumes nright is a NODE NODE*/
626  for(i = 0; i < 8; i++) {
627  int child = nleft->s.suns[i];
628  tb.Nodes[child].father = this_left;
629  }
630  /* Make sure final child points to the parent's sibling.*/
631 #ifdef DEBUG
632  int oldsib = tb.Nodes[nleft->s.suns[7]].sibling;
633 #endif
634  /* Walk downwards making sure all the children point to the new sibling.
635  * Note also changes last particle node child. */
636  int nn = this_left;
637  while(tb.Nodes[nn].f.ChildType == NODE_NODE_TYPE) {
638  nn = tb.Nodes[nn].s.suns[7];
639 #ifdef DEBUG
640  if(tb.Nodes[nn].sibling != oldsib)
641  endrun(20, "Not the expected sibling %d != %d\n",tb.Nodes[nn].sibling, oldsib);
642 #endif
643  tb.Nodes[nn].sibling = nleft->sibling;
644  }
645  /* Mark the right node as now invalid*/
646  nright->father = -5;
647  /* Next iteration is going to sibling*/
648  this_left = nleft->sibling;
649  this_right = nright->sibling;
650  continue;
651  }
652  else
653  endrun(6, "Nodes %d %d have unexpected type %d %d\n", this_left, this_right, nleft->f.ChildType, nright->f.ChildType);
654  }
655  return 0;
656 }

References add_particle_to_tree(), NODE::ChildType, endrun(), NODE::f, NODE::father, ForceTree::firstnode, ForceTree::lastnode, NODE::len, NODE::mom, NodeCache::nnext_thread, NodeChild::noccupied, NODE_NODE_TYPE, ForceTree::Nodes, PARTICLE_NODE_TYPE, NODE::s, NODE::sibling, NodeChild::suns, and NODE::TopLevel.

Referenced by force_tree_create_nodes().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ modify_internal_node()

static int modify_internal_node ( int  parent,
int  subnode,
int  p_toplace,
const ForceTree  tb,
const int  HybridNuGrav 
)
static

Definition at line 361 of file forcetree.c.

362 {
363  tb.Father[p_toplace] = parent;
364  tb.Nodes[parent].s.suns[subnode] = p_toplace;
365  /* Encode the type in the Types array*/
366  tb.Nodes[parent].s.Types += P[p_toplace].Type << (3*subnode);
367  if(!HybridNuGrav || P[p_toplace].Type != ForceTreeParams.FastParticleType)
368  add_particle_moment_to_node(&tb.Nodes[parent], p_toplace);
369  return 0;
370 }
static void add_particle_moment_to_node(struct NODE *pnode, int i)
Definition: forcetree.c:914

References add_particle_moment_to_node(), forcetree_params::FastParticleType, ForceTree::Father, ForceTreeParams, ForceTree::Nodes, P, NODE::s, NodeChild::suns, and NodeChild::Types.

Referenced by add_particle_to_tree(), and create_new_node_layer().

Here is the call graph for this function:
Here is the caller graph for this function:

Variable Documentation

◆ ForceTreeParams

struct forcetree_params ForceTreeParams
static