MP-Gadget  5.0.1.dev1-76bc7d4726-dirty
Classes | Macros | Typedefs | Functions
forcetree.h File Reference
#include "types.h"
#include "domain.h"
Include dependency graph for forcetree.h:
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Classes

struct  NodeChild
 
struct  NODE
 
struct  ForceTree
 

Macros

#define NMAXCHILD   8
 
#define PARTICLE_NODE_TYPE   0
 
#define NODE_NODE_TYPE   1
 
#define PSEUDO_NODE_TYPE   2
 
#define ALLMASK   (1<<30)-1
 
#define GASMASK   (1)
 
#define DMMASK   (2)
 
#define STARMASK   (1<<4)
 
#define BHMASK   (1<<5)
 

Typedefs

typedef struct ForceTree ForceTree
 

Functions

void init_forcetree_params (const int FastParticleType)
 
int force_tree_allocated (const ForceTree *tt)
 
void force_update_hmax (int *activeset, int size, ForceTree *tt, DomainDecomp *ddecomp)
 
void force_tree_rebuild (ForceTree *tree, DomainDecomp *ddecomp, const int HybridNuGrav, const int DoMoments, const char *EmergencyOutputDir)
 
void force_tree_rebuild_mask (ForceTree *tree, DomainDecomp *ddecomp, int mask, const int HybridNuGrav, const char *EmergencyOutputDir)
 
void force_tree_free (ForceTree *tt)
 
void dump_particles (void)
 
static int node_is_pseudo_particle (int no, const ForceTree *tree)
 
static int node_is_particle (int no, const ForceTree *tree)
 
static int node_is_node (int no, const ForceTree *tree)
 
int force_get_father (int no, const ForceTree *tt)
 
int force_tree_create_nodes (const ForceTree tb, const int npart, int mask, DomainDecomp *ddecomp, const int HybridNuGrav)
 
ForceTree force_treeallocate (int64_t maxnodes, int64_t maxpart, DomainDecomp *ddecomp)
 
void force_update_node_parallel (const ForceTree *tree, const DomainDecomp *ddecomp)
 

Macro Definition Documentation

◆ ALLMASK

#define ALLMASK   (1<<30)-1

Definition at line 20 of file forcetree.h.

◆ BHMASK

#define BHMASK   (1<<5)

Definition at line 24 of file forcetree.h.

◆ DMMASK

#define DMMASK   (2)

Definition at line 22 of file forcetree.h.

◆ GASMASK

#define GASMASK   (1)

Definition at line 21 of file forcetree.h.

◆ NMAXCHILD

#define NMAXCHILD   8

Definition at line 12 of file forcetree.h.

◆ NODE_NODE_TYPE

#define NODE_NODE_TYPE   1

Definition at line 16 of file forcetree.h.

◆ PARTICLE_NODE_TYPE

#define PARTICLE_NODE_TYPE   0

Definition at line 15 of file forcetree.h.

◆ PSEUDO_NODE_TYPE

#define PSEUDO_NODE_TYPE   2

Definition at line 17 of file forcetree.h.

◆ STARMASK

#define STARMASK   (1<<4)

Definition at line 23 of file forcetree.h.

Typedef Documentation

◆ ForceTree

typedef struct ForceTree ForceTree

Function Documentation

◆ dump_particles()

void dump_particles ( void  )

◆ force_get_father()

int force_get_father ( int  no,
const ForceTree tt 
)

Definition at line 905 of file forcetree.c.

906 {
907  if(no >= tree->firstnode)
908  return tree->Nodes[no].father;
909  else
910  return tree->Father[no];
911 }

References NODE::father, ForceTree::Father, ForceTree::firstnode, and ForceTree::Nodes.

Referenced by _prepare(), check_moments(), check_tree(), effhsml(), force_tree_eh_slots_fork(), ngb_treefind_threads(), set_init_hsml(), and setup_smoothinglengths().

Here is the caller graph for this function:

◆ force_tree_allocated()

int force_tree_allocated ( const ForceTree tt)

Definition at line 134 of file forcetree.c.

135 {
136  return tree->tree_allocated_flag;
137 }

References ForceTree::tree_allocated_flag.

Referenced by _prepare(), force_tree_free(), force_tree_rebuild(), force_tree_rebuild_mask(), sfr_reserve_slots(), and treewalk_run().

Here is the caller graph for this function:

◆ force_tree_create_nodes()

int force_tree_create_nodes ( const ForceTree  tb,
const int  npart,
int  mask,
DomainDecomp ddecomp,
const int  HybridNuGrav 
)

Does initial creation of the nodes for the gravitational oct-tree. mask is a bitfield: Only types whose bit is set are added.

Definition at line 661 of file forcetree.c.

662 {
663  int nnext = tb.firstnode; /* index of first free node */
664 
665  /* create an empty root node */
666  {
667  int i;
668  struct NODE *nfreep = &tb.Nodes[nnext]; /* select first node */
669 
670  nfreep->len = PartManager->BoxSize*1.001;
671  for(i = 0; i < 3; i++)
672  nfreep->center[i] = PartManager->BoxSize/2.;
673  for(i = 0; i < NMAXCHILD; i++)
674  nfreep->s.suns[i] = -1;
675  nfreep->s.Types = 0;
676  nfreep->s.noccupied = 0;
677  nfreep->father = -1;
678  nfreep->sibling = -1;
679  nfreep->f.TopLevel = 1;
680  nfreep->f.InternalTopLevel = 0;
681  nfreep->f.DependsOnLocalMass = 0;
682  nfreep->f.ChildType = PARTICLE_NODE_TYPE;
683  nfreep->f.unused = 0;
684  memset(&(nfreep->mom.cofm),0,3*sizeof(MyFloat));
685  nfreep->mom.mass = 0;
686  nfreep->mom.hmax = 0;
687  nnext++;
688  /* create a set of empty nodes corresponding to the top-level ddecomp
689  * grid. We need to generate these nodes first to make sure that we have a
690  * complete top-level tree which allows the easy insertion of the
691  * pseudo-particles in the right place */
692  force_create_node_for_topnode(tb.firstnode, 0, tb.Nodes, ddecomp, 1, 0, 0, 0, &nnext, tb.lastnode);
693  }
694  /* Set up thread-local copies of the topnodes to anchor the subtrees. */
695  int ThisTask, j, t;
696  MPI_Comm_rank(MPI_COMM_WORLD, &ThisTask);
697  const int StartLeaf = ddecomp->Tasks[ThisTask].StartLeaf;
698  const int EndLeaf = ddecomp->Tasks[ThisTask].EndLeaf;
699  const int nthr = omp_get_max_threads();
700  int * topnodes = ta_malloc("topnodes", int, (EndLeaf - StartLeaf) * nthr);
701  /* Topnodes for each thread. For tid 0, just use the real tree. Saves copying the tree back.*/
702  for(j = 0; j < EndLeaf - StartLeaf; j++)
703  topnodes[j] = ddecomp->TopLeaves[j + StartLeaf].treenode;
704  /* Other threads need a copy*/
705  for(t = 1; t < nthr; t++) {
706  for(j = 0; j < EndLeaf - StartLeaf; j++) {
707  /* Make a local copy*/
708  topnodes[j + t * (EndLeaf - StartLeaf)] = nnext;
709  memmove(&tb.Nodes[nnext], &tb.Nodes[topnodes[j]], sizeof(struct NODE));
710  nnext++;
711  }
712  }
713 
714 /* double tstart = second(); */
715  const int first_free = nnext;
716  /* Increment nnext for the threads we are about to initialise.*/
717  nnext += NODECACHE_SIZE * nthr;
718  /* now we insert all particles */
719  #pragma omp parallel
720  {
721  int i;
722  /* Local topnodes*/
723  int tid = omp_get_thread_num();
724  const int * const local_topnodes = topnodes + tid * (EndLeaf - StartLeaf);
725 
726  /* This implements a small thread-local free Node cache.
727  * The cache ensures that Nodes from the same (or close) particles
728  * are created close to each other on the Node list and thus
729  * helps cache locality. I tried each thread getting a separate
730  * part of the tree, and it wasted too much memory. */
731  struct NodeCache nc;
732  nc.nnext_thread = first_free + tid * NODECACHE_SIZE;
733  nc.nrem_thread = NODECACHE_SIZE;
734 
735  /* Stores the last-seen node on this thread.
736  * Since most particles are close to each other, this should save a number of tree walks.*/
737  int this_acc = local_topnodes[0];
738  // message(1, "Topnodes %d real %d\n", local_topnodes[0], topnodes[0]);
739 
740  /* The default schedule is static with a chunk 1/4 the total.
741  * However, particles are sorted by type and then by peano order.
742  * Since we need to merge trees, it is advantageous to have all particles
743  * spatially close be processed by the same thread. This means that the threads should
744  * process particles at a constant offset from the start of the type.
745  * We do this with a static schedule. */
746  int chnksz = PartManager->NumPart/nthr;
747  if(SlotsManager->info[0].enabled && SlotsManager->info[0].size > 0)
748  chnksz = SlotsManager->info[0].size/nthr;
749  if(chnksz < 1000)
750  chnksz = 1000;
751  #pragma omp for schedule(static, chnksz)
752  for(i = 0; i < npart; i++)
753  {
754  /*Can't break from openmp for*/
755  if(nc.nnext_thread >= tb.lastnode)
756  continue;
757 
758  /* Do not add types that do not have their mask bit set.*/
759  if(!((1<<P[i].Type) & mask)) {
760  continue;
761  }
762  /* Do not add garbage/swallowed particles to the tree*/
763  if(P[i].IsGarbage || (P[i].Swallowed && P[i].Type==5))
764  continue;
765 
766  if(P[i].Mass == 0)
767  endrun(12, "Zero mass particle %d type %d id %ld pos %g %g %g\n", i, P[i].Type, P[i].ID, P[i].Pos[0], P[i].Pos[1], P[i].Pos[2]);
768  /*First find the Node for the TopLeaf */
769  int cur;
770  if(inside_node(&tb.Nodes[this_acc], i)) {
771  cur = this_acc;
772  } else {
773  /* Get the topnode to which a particle belongs. Each local tree
774  * has a local set of treenodes copying the global topnodes, except tid 0
775  * which has the real topnodes.*/
776  const int topleaf = domain_get_topleaf(P[i].Key, ddecomp);
777  //int treenode = ddecomp->TopLeaves[topleaf].treenode;
778  cur = local_topnodes[topleaf - StartLeaf];
779  }
780 
781  this_acc = add_particle_to_tree(i, cur, tb, HybridNuGrav, &nc, &nnext);
782  }
783  /* The implicit omp-barrier is important here!*/
784 /* double tend = second(); */
785 /* message(0, "Initial insertion: %.3g ms. First node %d\n", (tend - tstart)*1000, local_topnodes[0]); */
786 
787  /* Merge each topnode separately, using a for loop.
788  * This wastes threads if NTHREAD > NTOPNODES, but it
789  * means only one merge is done per subtree and
790  * it requires no locking.*/
791  #pragma omp for schedule(static, 1)
792  for(i = 0; i < EndLeaf - StartLeaf; i++) {
793  int t;
794  /* These are the addresses of the real topnodes*/
795  const int target = topnodes[i];
796  if(nc.nnext_thread >= tb.lastnode)
797  continue;
798  for(t = 1; t < nthr; t++) {
799  const int righttop = topnodes[i + t * (EndLeaf - StartLeaf)];
800 // message(1, "tid = %d i = %d t = %d Merging %d to %d addresses are %lx - %lx end is %lx\n", omp_get_thread_num(), i, t, righttop, target, &tb.Nodes[righttop], &tb.Nodes[target], &tb.Nodes[nnext]);
801  if(merge_partial_force_trees(target, righttop, &nc, &nnext, tb, HybridNuGrav))
802  break;
803  }
804  }
805  }
806 
807  ta_free(topnodes);
808  return nnext - tb.firstnode;
809 }
static int domain_get_topleaf(const peano_t key, const DomainDecomp *ddecomp)
Definition: domain.h:74
void endrun(int where, const char *fmt,...)
Definition: endrun.c:147
int merge_partial_force_trees(int left, int right, struct NodeCache *nc, int *nnext, const struct ForceTree tb, int HybridNuGrav)
Definition: forcetree.c:533
static void force_create_node_for_topnode(int no, int topnode, struct NODE *Nodes, const DomainDecomp *ddecomp, int bits, int x, int y, int z, int *nextfree, const int lastnode)
Definition: forcetree.c:818
int add_particle_to_tree(int i, int cur_start, const ForceTree tb, const int HybridNuGrav, struct NodeCache *nc, int *nnext)
Definition: forcetree.c:487
static int inside_node(const struct NODE *node, const int p_i)
Definition: forcetree.c:291
#define NODECACHE_SIZE
Definition: forcetree.c:336
#define PARTICLE_NODE_TYPE
Definition: forcetree.h:15
#define NMAXCHILD
Definition: forcetree.h:12
#define ta_malloc(name, type, nele)
Definition: mymalloc.h:25
#define ta_free(p)
Definition: mymalloc.h:28
struct part_manager_type PartManager[1]
Definition: partmanager.c:11
#define P
Definition: partmanager.h:88
struct slots_manager_type SlotsManager[1]
Definition: slotsmanager.c:7
struct task_data * Tasks
Definition: domain.h:41
struct topleaf_data * TopLeaves
Definition: domain.h:37
struct NODE * Nodes
Definition: forcetree.h:97
int firstnode
Definition: forcetree.h:84
int lastnode
Definition: forcetree.h:86
Definition: forcetree.h:39
int sibling
Definition: forcetree.h:40
struct NODE::@9 mom
unsigned int DependsOnLocalMass
Definition: forcetree.h:48
MyFloat center[3]
Definition: forcetree.h:43
unsigned int ChildType
Definition: forcetree.h:49
MyFloat mass
Definition: forcetree.h:56
unsigned int unused
Definition: forcetree.h:51
MyFloat hmax
Definition: forcetree.h:57
int father
Definition: forcetree.h:41
MyFloat cofm[3]
Definition: forcetree.h:55
unsigned int TopLevel
Definition: forcetree.h:47
struct NodeChild s
Definition: forcetree.h:66
unsigned int InternalTopLevel
Definition: forcetree.h:46
struct NODE::@8 f
MyFloat len
Definition: forcetree.h:42
int nnext_thread
Definition: forcetree.c:340
int noccupied
Definition: forcetree.h:35
int suns[NMAXCHILD]
Definition: forcetree.h:32
unsigned int Types
Definition: forcetree.h:30
int64_t size
Definition: slotsmanager.h:12
struct slot_info info[6]
Definition: slotsmanager.h:112
int EndLeaf
Definition: domain.h:30
int StartLeaf
Definition: domain.h:29
int treenode
Definition: domain.h:24
int ThisTask
Definition: test_exchange.c:23
LOW_PRECISION MyFloat
Definition: types.h:19

References add_particle_to_tree(), part_manager_type::BoxSize, NODE::center, NODE::ChildType, NODE::cofm, NODE::DependsOnLocalMass, domain_get_topleaf(), slot_info::enabled, task_data::EndLeaf, endrun(), NODE::f, NODE::father, ForceTree::firstnode, force_create_node_for_topnode(), NODE::hmax, slots_manager_type::info, inside_node(), NODE::InternalTopLevel, ForceTree::lastnode, NODE::len, NODE::mass, merge_partial_force_trees(), NODE::mom, NMAXCHILD, NodeCache::nnext_thread, NodeChild::noccupied, NODECACHE_SIZE, ForceTree::Nodes, NodeCache::nrem_thread, part_manager_type::NumPart, P, PARTICLE_NODE_TYPE, PartManager, NODE::s, NODE::sibling, slot_info::size, SlotsManager, task_data::StartLeaf, NodeChild::suns, ta_free, ta_malloc, DomainDecomp::Tasks, ThisTask, DomainDecomp::TopLeaves, NODE::TopLevel, topleaf_data::treenode, NodeChild::Types, and NODE::unused.

Referenced by do_tree_test(), and force_tree_build().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ force_tree_free()

void force_tree_free ( ForceTree tree)

This function frees the memory allocated for the tree, i.e. it frees the space allocated by the function force_treeallocate().

Definition at line 1409 of file forcetree.c.

1410 {
1412 
1413  if(!force_tree_allocated(tree))
1414  return;
1415  myfree(tree->Nodes_base);
1416  myfree(tree->Father);
1417  tree->tree_allocated_flag = 0;
1418 }
int event_unlisten(EventSpec *eh, eventfunc func, void *userdata)
Definition: event.c:26
EventSpec EventSlotsFork
Definition: event.c:54
int force_tree_allocated(const ForceTree *tree)
Definition: forcetree.c:134
static int force_tree_eh_slots_fork(EIBase *event, void *userdata)
Definition: forcetree.c:110
#define myfree(x)
Definition: mymalloc.h:19
int tree_allocated_flag
Definition: forcetree.h:78
struct NODE * Nodes_base
Definition: forcetree.h:101
int * Father
Definition: forcetree.h:103

References event_unlisten(), EventSlotsFork, ForceTree::Father, force_tree_allocated(), force_tree_eh_slots_fork(), myfree, ForceTree::Nodes_base, and ForceTree::tree_allocated_flag.

Referenced by _prepare(), do_density_test(), do_force_test(), fof_fof(), force_tree_build(), force_tree_rebuild(), force_tree_rebuild_mask(), metal_return(), run(), run_gravity_test(), runfof(), runpower(), setup_smoothinglengths(), test_rebuild_close(), test_rebuild_flat(), test_rebuild_random(), and turn_on_quasars().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ force_tree_rebuild()

void force_tree_rebuild ( ForceTree tree,
DomainDecomp ddecomp,
const int  HybridNuGrav,
const int  DoMoments,
const char *  EmergencyOutputDir 
)

Definition at line 140 of file forcetree.c.

141 {
142  MPIU_Barrier(MPI_COMM_WORLD);
143  message(0, "Tree construction. (presently allocated=%g MB)\n", mymalloc_usedbytes() / (1024.0 * 1024.0));
144 
145  if(force_tree_allocated(tree)) {
146  force_tree_free(tree);
147  }
148  walltime_measure("/Misc");
149 
150  *tree = force_tree_build(PartManager->NumPart, ALLMASK, ddecomp, HybridNuGrav, DoMoments, EmergencyOutputDir);
151 
153  walltime_measure("/Tree/Build/Moments");
154 
155  message(0, "Tree constructed (moments: %d). First node %d, number of nodes %d, first pseudo %d. NTopLeaves %d\n",
156  tree->moments_computed_flag, tree->firstnode, tree->numnodes, tree->lastnode, tree->NTopLeaves);
157  MPIU_Barrier(MPI_COMM_WORLD);
158 }
void message(int where, const char *fmt,...)
Definition: endrun.c:175
int event_listen(EventSpec *eh, eventfunc func, void *userdata)
Definition: event.c:6
static ForceTree force_tree_build(int npart, int mask, DomainDecomp *ddecomp, const int HybridNuGrav, const int DoMoments, const char *EmergencyOutputDir)
Definition: forcetree.c:191
void force_tree_free(ForceTree *tree)
Definition: forcetree.c:1409
#define ALLMASK
Definition: forcetree.h:20
#define mymalloc_usedbytes()
Definition: mymalloc.h:32
int NTopLeaves
Definition: forcetree.h:94
int moments_computed_flag
Definition: forcetree.h:82
int numnodes
Definition: forcetree.h:88
#define MPIU_Barrier(comm)
Definition: system.h:103
#define walltime_measure(name)
Definition: walltime.h:8

References ALLMASK, event_listen(), EventSlotsFork, ForceTree::firstnode, force_tree_allocated(), force_tree_build(), force_tree_eh_slots_fork(), force_tree_free(), ForceTree::lastnode, message(), ForceTree::moments_computed_flag, MPIU_Barrier, mymalloc_usedbytes, ForceTree::NTopLeaves, ForceTree::numnodes, part_manager_type::NumPart, PartManager, and walltime_measure.

Referenced by do_density_test(), do_force_test(), run(), run_gravity_test(), runfof(), runpower(), and setup_smoothinglengths().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ force_tree_rebuild_mask()

void force_tree_rebuild_mask ( ForceTree tree,
DomainDecomp ddecomp,
int  mask,
const int  HybridNuGrav,
const char *  EmergencyOutputDir 
)

Definition at line 161 of file forcetree.c.

162 {
163  MPIU_Barrier(MPI_COMM_WORLD);
164  message(0, "Tree construction for types: %d.\n", mask);
165 
166  if(force_tree_allocated(tree)) {
167  force_tree_free(tree);
168  }
169  walltime_measure("/Misc");
170 
171  /* No moments*/
172  *tree = force_tree_build(PartManager->NumPart, mask, ddecomp, HybridNuGrav, 0, EmergencyOutputDir);
173  walltime_measure("/SPH/Build");
174 
175  message(0, "Tree constructed (type mask: %d). First node %d, number of nodes %d, first pseudo %d. NTopLeaves %d\n",
176  mask, tree->firstnode, tree->numnodes, tree->lastnode, tree->NTopLeaves);
177  MPIU_Barrier(MPI_COMM_WORLD);
178 }

References ForceTree::firstnode, force_tree_allocated(), force_tree_build(), force_tree_free(), ForceTree::lastnode, message(), MPIU_Barrier, ForceTree::NTopLeaves, ForceTree::numnodes, part_manager_type::NumPart, PartManager, and walltime_measure.

Referenced by fof_fof(), metal_return(), and turn_on_quasars().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ force_treeallocate()

ForceTree force_treeallocate ( int64_t  maxnodes,
int64_t  maxpart,
DomainDecomp ddecomp 
)

This function allocates the memory used for storage of the tree and of auxiliary arrays needed for tree-walk and link-lists. Usually, maxnodes approximately equal to 0.7*maxpart is sufficient to store the tree for up to maxpart particles.

Definition at line 1381 of file forcetree.c.

1382 {
1383  ForceTree tb;
1384 
1385  tb.Father = (int *) mymalloc("Father", maxpart * sizeof(int));
1386  tb.nfather = maxpart;
1387 #ifdef DEBUG
1388  memset(tb.Father, -1, maxpart * sizeof(int));
1389 #endif
1390  tb.Nodes_base = (struct NODE *) mymalloc("Nodes_base", (maxnodes + 1) * sizeof(struct NODE));
1391 #ifdef DEBUG
1392  memset(tb.Nodes_base, -1, (maxnodes + 1) * sizeof(struct NODE));
1393 #endif
1394  tb.firstnode = maxpart;
1395  tb.lastnode = maxpart + maxnodes;
1396  if(maxpart + maxnodes >= 1L<<30)
1397  endrun(5, "Size of tree overflowed for maxpart = %ld, maxnodes = %ld!\n", maxpart, maxnodes);
1398  tb.numnodes = 0;
1399  tb.Nodes = tb.Nodes_base - maxpart;
1400  tb.tree_allocated_flag = 1;
1401  tb.NTopLeaves = ddecomp->NTopLeaves;
1402  tb.TopLeaves = ddecomp->TopLeaves;
1403  return tb;
1404 }
#define mymalloc(name, size)
Definition: mymalloc.h:15
int NTopLeaves
Definition: domain.h:40
int nfather
Definition: forcetree.h:104
struct topleaf_data * TopLeaves
Definition: forcetree.h:92

References endrun(), ForceTree::Father, ForceTree::firstnode, ForceTree::lastnode, mymalloc, ForceTree::nfather, ForceTree::Nodes, ForceTree::Nodes_base, DomainDecomp::NTopLeaves, ForceTree::NTopLeaves, ForceTree::numnodes, DomainDecomp::TopLeaves, ForceTree::TopLeaves, and ForceTree::tree_allocated_flag.

Referenced by force_tree_build(), test_rebuild_close(), test_rebuild_flat(), and test_rebuild_random().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ force_update_hmax()

void force_update_hmax ( int *  activeset,
int  size,
ForceTree tree,
DomainDecomp ddecomp 
)

This function updates the hmax-values in tree nodes that hold SPH particles. Since the Hsml-values are potentially changed for active particles in the SPH-density computation, force_update_hmax() should be carried out just before the hydrodynamical SPH forces are computed, i.e. after density().

The purpose of the hmax node is for a symmetric treewalk (currently only the hydro). Particles where P[i].Pos + Hsml pokes beyond the exterior of the tree node may mean that a tree node should be included when it would normally be culled. Therefore we don't really want hmax, we want the maximum amount P[i].Pos + Hsml pokes beyond the tree node.

Definition at line 1271 of file forcetree.c.

1272 {
1273  int NTask, ThisTask, recvTask;
1274  int i, ta;
1275  int *recvcounts, *recvoffset;
1276 
1277  walltime_measure("/Misc");
1278 
1279  /* If hmax has not yet been computed, do all particles*/
1280  if(!tree->hmax_computed_flag) {
1281  activeset = NULL;
1282  size = PartManager->NumPart;
1283  }
1284  MPI_Comm_size(MPI_COMM_WORLD, &NTask);
1285  MPI_Comm_rank(MPI_COMM_WORLD, &ThisTask);
1286 
1287  #pragma omp parallel for
1288  for(i = 0; i < size; i++)
1289  {
1290  const int p_i = activeset ? activeset[i] : i;
1291 
1292  if(P[p_i].Type != 0 || P[p_i].IsGarbage)
1293  continue;
1294 
1295  int no = tree->Father[p_i];
1296 
1297  while(no >= 0)
1298  {
1299  /* How much does this particle peek beyond this node?
1300  * Note len does not change so we can read it without a lock or atomic. */
1301  MyFloat readhmax, newhmax = 0;
1302  int j, done = 0;
1303  for(j = 0; j < 3; j++) {
1304  /* Compute each direction independently and take the maximum.
1305  * This is the largest possible distance away from node center within a cube bounding hsml.
1306  * Note that because Pos - Center < len, the maximum value this can have is Hsml.*/
1307  newhmax = DMAX(newhmax, fabs(P[p_i].Pos[j] - tree->Nodes[no].center[j]) + P[p_i].Hsml - tree->Nodes[no].len);
1308  }
1309  /* Most particles will lie fully inside a node. No need then for the atomic! */
1310  if(newhmax <= 0)
1311  break;
1312 
1313  #pragma omp atomic read
1314  readhmax = tree->Nodes[no].mom.hmax;
1315  do {
1316  if(newhmax <= readhmax) {
1317  done = 1;
1318  break;
1319  }
1320  /* Swap in the new hmax only if the old one hasn't changed. */
1321  } while(!__atomic_compare_exchange(&(tree->Nodes[no].mom.hmax), &readhmax, &newhmax, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED));
1322 
1323  if(done)
1324  break;
1325  no = tree->Nodes[no].father;
1326  }
1327  }
1328 
1329  double * TopLeafhmax = (double *) mymalloc("TopLeafMoments", ddecomp->NTopLeaves * sizeof(double));
1330  memset(&TopLeafhmax[0], 0, sizeof(double) * ddecomp->NTopLeaves);
1331 
1332  for(i = ddecomp->Tasks[ThisTask].StartLeaf; i < ddecomp->Tasks[ThisTask].EndLeaf; i ++) {
1333  int no = ddecomp->TopLeaves[i].treenode;
1334  TopLeafhmax[i] = tree->Nodes[no].mom.hmax;
1335  }
1336 
1337  /* share the hmax-data of the dirty nodes accross CPUs */
1338  recvcounts = (int *) mymalloc("recvcounts", sizeof(int) * NTask);
1339  recvoffset = (int *) mymalloc("recvoffset", sizeof(int) * NTask);
1340 
1341  for(recvTask = 0; recvTask < NTask; recvTask++)
1342  {
1343  recvoffset[recvTask] = ddecomp->Tasks[recvTask].StartLeaf;
1344  recvcounts[recvTask] = ddecomp->Tasks[recvTask].EndLeaf - ddecomp->Tasks[recvTask].StartLeaf;
1345  }
1346 
1347  MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL,
1348  &TopLeafhmax[0], recvcounts, recvoffset,
1349  MPI_DOUBLE, MPI_COMM_WORLD);
1350 
1351  myfree(recvoffset);
1352  myfree(recvcounts);
1353 
1354  for(ta = 0; ta < NTask; ta++) {
1355  if(ta == ThisTask)
1356  continue; /* bypass ThisTask since it is already up to date */
1357  for(i = ddecomp->Tasks[ta].StartLeaf; i < ddecomp->Tasks[ta].EndLeaf; i ++) {
1358  int no = ddecomp->TopLeaves[i].treenode;
1359  tree->Nodes[no].mom.hmax = TopLeafhmax[i];
1360 
1361  while(no >= 0)
1362  {
1363  if(TopLeafhmax[i] <= tree->Nodes[no].mom.hmax)
1364  break;
1365  tree->Nodes[no].mom.hmax = TopLeafhmax[i];
1366  no = tree->Nodes[no].father;
1367  }
1368  }
1369  }
1370  myfree(TopLeafhmax);
1371 
1372  tree->hmax_computed_flag = 1;
1373  walltime_measure("/Tree/HmaxUpdate");
1374 }
int hmax_computed_flag
Definition: forcetree.h:80
int NTask
Definition: test_exchange.c:23
#define DMAX(x, y)
Definition: test_interp.c:11

References NODE::center, DMAX, task_data::EndLeaf, NODE::father, ForceTree::Father, NODE::hmax, ForceTree::hmax_computed_flag, NODE::len, NODE::mom, myfree, mymalloc, ForceTree::Nodes, NTask, DomainDecomp::NTopLeaves, part_manager_type::NumPart, P, PartManager, task_data::StartLeaf, DomainDecomp::Tasks, ThisTask, DomainDecomp::TopLeaves, topleaf_data::treenode, and walltime_measure.

Referenced by run().

Here is the caller graph for this function:

◆ force_update_node_parallel()

void force_update_node_parallel ( const ForceTree tree,
const DomainDecomp ddecomp 
)

This routine determines the multipole moments for a given internal node and all its subnodes in parallel, assigning the recursive algorithm to different threads using openmp's task api. The result is stored in tb.Nodes in the sequence of this tree-walk.

  • A new task is spawned from each down from each local topleaf. Local topleaves are used so that we do not waste time trying moment calculation with pseudoparticles.
  • Each internal node found at that level is added to a list, together with its sibling.
  • Each node in this list then has the recursive moment calculation called on it. Note: If the tree is very unbalanced and one branch much deeper than the others, this will not be efficient.
  • Once each tree's recursive moment is generated in parallel, the tail value from each recursion is stored, and the node marked as done.
  • A final recursive moment calculation is run in serial for the top 3 levels of the tree. When it encounters one of the pre-computed nodes, it searches the list of pre-computed tail values to set the next node as if it had recursed and continues.

Definition at line 1081 of file forcetree.c.

1082 {
1083  int ThisTask;
1084  MPI_Comm_rank(MPI_COMM_WORLD, &ThisTask);
1085 
1086 #pragma omp parallel
1087 #pragma omp single nowait
1088  {
1089  int i;
1090  for(i = ddecomp->Tasks[ThisTask].StartLeaf; i < ddecomp->Tasks[ThisTask].EndLeaf; i ++) {
1091  const int no = ddecomp->TopLeaves[i].treenode;
1092  /* Nodes containing other nodes: the overwhelmingly likely case.*/
1093  if(tree->Nodes[no].f.ChildType == NODE_NODE_TYPE) {
1094  #pragma omp task default(none) shared(tree) firstprivate(no)
1095  force_update_node_recursive(no, tree->Nodes[no].sibling, 1, tree);
1096  }
1097  else if(tree->Nodes[no].f.ChildType == PARTICLE_NODE_TYPE)
1098  force_update_particle_node(no, tree);
1099  else if(tree->Nodes[no].f.ChildType == PSEUDO_NODE_TYPE)
1100  endrun(5, "Error, found pseudo node %d but domain entry %d says on task %d\n", no, i, ThisTask);
1101  }
1102  }
1103 }
static void force_update_particle_node(int no, const ForceTree *tree)
Definition: forcetree.c:950
static int force_update_node_recursive(int no, int sib, int level, const ForceTree *tree)
Definition: forcetree.c:982
#define NODE_NODE_TYPE
Definition: forcetree.h:16
#define PSEUDO_NODE_TYPE
Definition: forcetree.h:17

References NODE::ChildType, endrun(), NODE::f, force_update_node_recursive(), force_update_particle_node(), NODE_NODE_TYPE, ForceTree::Nodes, PARTICLE_NODE_TYPE, PSEUDO_NODE_TYPE, NODE::sibling, task_data::StartLeaf, DomainDecomp::Tasks, ThisTask, DomainDecomp::TopLeaves, and topleaf_data::treenode.

Referenced by do_tree_test(), and force_tree_build().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ init_forcetree_params()

void init_forcetree_params ( const int  FastParticleType)

Definition at line 43 of file forcetree.c.

44 {
45  /* This was increased due to the extra nodes created by subtrees*/
47  ForceTreeParams.FastParticleType = FastParticleType;
48 }
static struct forcetree_params ForceTreeParams
double TreeAllocFactor
Definition: forcetree.c:37
int FastParticleType
Definition: forcetree.c:39

References forcetree_params::FastParticleType, ForceTreeParams, and forcetree_params::TreeAllocFactor.

Referenced by begrun(), setup_density(), setup_tree(), and test_fof().

Here is the caller graph for this function:

◆ node_is_node()

static int node_is_node ( int  no,
const ForceTree tree 
)
inlinestatic

Definition at line 145 of file forcetree.h.

146 {
147  return (no >= tree->firstnode) && (no < tree->firstnode + tree->numnodes);
148 }

References ForceTree::firstnode, and ForceTree::numnodes.

◆ node_is_particle()

static int node_is_particle ( int  no,
const ForceTree tree 
)
inlinestatic

Definition at line 139 of file forcetree.h.

140 {
141  return no >= 0 && no < tree->firstnode;
142 }

References ForceTree::firstnode.

Referenced by ngb_treefind_threads().

Here is the caller graph for this function:

◆ node_is_pseudo_particle()

static int node_is_pseudo_particle ( int  no,
const ForceTree tree 
)
inlinestatic

Definition at line 133 of file forcetree.h.

134 {
135  return no >= tree->lastnode;
136 }

References ForceTree::lastnode.

Referenced by ngb_treefind_threads().

Here is the caller graph for this function: