MP-Gadget  5.0.1.dev1-76bc7d4726-dirty
Classes | Macros | Functions | Variables
treewalk.c File Reference
#include <mpi.h>
#include <math.h>
#include <string.h>
#include <stdlib.h>
#include <omp.h>
#include "utils.h"
#include "treewalk.h"
#include "partmanager.h"
#include "domain.h"
#include "forcetree.h"
#include <signal.h>
Include dependency graph for treewalk.c:

Go to the source code of this file.

Classes

struct  SendRecvBuffer
 
struct  TreeWalkThreadLocals
 
struct  data_nodelist
 
struct  data_index
 

Macros

#define BREAKPOINT   raise(SIGTRAP)
 
#define FACT1   0.366025403785 /* FACT1 = 0.5 * (sqrt(3)-1) */
 
#define WATCH
 

Functions

void set_treewalk_params (ParameterSet *ps)
 
static void ev_init_thread (const struct TreeWalkThreadLocals local_exports, TreeWalk *const tw, LocalTreeWalk *lv)
 
static void ev_begin (TreeWalk *tw, int *active_set, const size_t size)
 
static void ev_finish (TreeWalk *tw)
 
static void ev_primary (TreeWalk *tw)
 
static struct SendRecvBuffer ev_get_remote (TreeWalk *tw)
 
static void ev_secondary (TreeWalk *tw)
 
static void ev_reduce_result (const struct SendRecvBuffer sndrcv, TreeWalk *tw)
 
static int ev_ndone (TreeWalk *tw)
 
static int ngb_treefind_threads (TreeWalkQueryBase *I, TreeWalkResultBase *O, TreeWalkNgbIterBase *iter, int startnode, LocalTreeWalk *lv)
 
static int data_index_compare (const void *a, const void *b)
 
static struct TreeWalkThreadLocals ev_alloc_threadlocals (TreeWalk *tw, const int NTask, const int NumThreads)
 
static void ev_free_threadlocals (struct TreeWalkThreadLocals local_exports)
 
static void treewalk_init_query (TreeWalk *tw, TreeWalkQueryBase *query, int i, int *NodeList)
 
static void treewalk_init_result (TreeWalk *tw, TreeWalkResultBase *result, TreeWalkQueryBase *query)
 
static void treewalk_reduce_result (TreeWalk *tw, TreeWalkResultBase *result, int i, enum TreeWalkReduceMode mode)
 
static int real_ev (struct TreeWalkThreadLocals local_exports, TreeWalk *tw, size_t *dataindexoffset, size_t *nexports, int *currentIndex)
 
void treewalk_build_queue (TreeWalk *tw, int *active_set, const size_t size, int may_have_garbage)
 
int treewalk_export_particle (LocalTreeWalk *lv, int no)
 
void treewalk_run (TreeWalk *tw, int *active_set, size_t size)
 
static void ev_communicate (void *sendbuf, void *recvbuf, size_t elsize, const struct SendRecvBuffer sndrcv, int import)
 
static int data_index_compare_by_index (const void *a, const void *b)
 
int treewalk_visit_ngbiter (TreeWalkQueryBase *I, TreeWalkResultBase *O, LocalTreeWalk *lv)
 
static int cull_node (const TreeWalkQueryBase *const I, const TreeWalkNgbIterBase *const iter, const struct NODE *const current, const double BoxSize)
 
int treewalk_visit_nolist_ngbiter (TreeWalkQueryBase *I, TreeWalkResultBase *O, LocalTreeWalk *lv)
 
void treewalk_do_hsml_loop (TreeWalk *tw, int *queue, int64_t queuesize, int update_hsml)
 
double ngb_narrow_down (double *right, double *left, const double *radius, const double *numNgb, int maxcmpt, int desnumngb, int *closeidx, double BoxSize)
 

Variables

static int ImportBufferBoost
 
static struct data_nodelistDataNodeList
 
static struct data_indexDataIndexTable
 
static TreeWalkGDB_current_ev = NULL
 

Macro Definition Documentation

◆ BREAKPOINT

#define BREAKPOINT   raise(SIGTRAP)

Definition at line 15 of file treewalk.c.

◆ FACT1

#define FACT1   0.366025403785 /* FACT1 = 0.5 * (sqrt(3)-1) */

Definition at line 17 of file treewalk.c.

◆ WATCH

#define WATCH
Value:
{ \
printf("tw->WorkSet[0] = %d (%d) %s:%d\n", tw->WorkSet ? tw->WorkSet[0] : 0, tw->WorkSetSize, __FILE__, __LINE__); \
}

Definition at line 112 of file treewalk.c.

Function Documentation

◆ cull_node()

static int cull_node ( const TreeWalkQueryBase *const  I,
const TreeWalkNgbIterBase *const  iter,
const struct NODE *const  current,
const double  BoxSize 
)
static

Cull a node.

Returns 1 if the node shall be opened; Returns 0 if the node has no business with this query.

Definition at line 1011 of file treewalk.c.

1012 {
1013  double dist;
1014  if(iter->symmetric == NGB_TREEFIND_SYMMETRIC) {
1015  dist = DMAX(current->mom.hmax, iter->Hsml) + 0.5 * current->len;
1016  } else {
1017  dist = iter->Hsml + 0.5 * current->len;
1018  }
1019 
1020  double r2 = 0;
1021  double dx = 0;
1022  /* do each direction */
1023  int d;
1024  for(d = 0; d < 3; d ++) {
1025  dx = NEAREST(current->center[d] - I->Pos[d], BoxSize);
1026  if(dx > dist) return 0;
1027  if(dx < -dist) return 0;
1028  r2 += dx * dx;
1029  }
1030  /* now test against the minimal sphere enclosing everything */
1031  dist += FACT1 * current->len;
1032 
1033  if(r2 > dist * dist) {
1034  return 0;
1035  }
1036  return 1;
1037 }
#define NEAREST(x, BoxSize)
Definition: partmanager.h:99
struct NODE::@9 mom
MyFloat center[3]
Definition: forcetree.h:43
MyFloat hmax
Definition: forcetree.h:57
MyFloat len
Definition: forcetree.h:42
enum NgbTreeFindSymmetric symmetric
Definition: treewalk.h:37
double Pos[3]
Definition: treewalk.h:23
#define DMAX(x, y)
Definition: test_interp.c:11
#define FACT1
Definition: treewalk.c:17
@ NGB_TREEFIND_SYMMETRIC
Definition: treewalk.h:11

References NODE::center, DMAX, FACT1, NODE::hmax, TreeWalkNgbIterBase::Hsml, NODE::len, NODE::mom, NEAREST, NGB_TREEFIND_SYMMETRIC, TreeWalkQueryBase::Pos, and TreeWalkNgbIterBase::symmetric.

Referenced by ngb_treefind_threads(), and treewalk_visit_nolist_ngbiter().

Here is the caller graph for this function:

◆ data_index_compare()

int data_index_compare ( const void *  a,
const void *  b 
)
static

This function is used as a comparison kernel in a sort routine. It is used to group particles in the communication buffer that are going to be sent to the same CPU.

Definition at line 85 of file treewalk.c.

86 {
87  if(((struct data_index *) a)->Task < (((struct data_index *) b)->Task))
88  return -1;
89 
90  if(((struct data_index *) a)->Task > (((struct data_index *) b)->Task))
91  return +1;
92 
93  if(((struct data_index *) a)->Index < (((struct data_index *) b)->Index))
94  return -1;
95 
96  if(((struct data_index *) a)->Index > (((struct data_index *) b)->Index))
97  return +1;
98 
99  if(((struct data_index *) a)->IndexGet < (((struct data_index *) b)->IndexGet))
100  return -1;
101 
102  if(((struct data_index *) a)->IndexGet > (((struct data_index *) b)->IndexGet))
103  return +1;
104 
105  return 0;
106 }

◆ data_index_compare_by_index()

static int data_index_compare_by_index ( const void *  a,
const void *  b 
)
static

Definition at line 793 of file treewalk.c.

794 {
795  if(((struct data_index *) a)->Index < (((struct data_index *) b)->Index))
796  return -1;
797 
798  if(((struct data_index *) a)->Index > (((struct data_index *) b)->Index))
799  return +1;
800 
801  if(((struct data_index *) a)->IndexGet < (((struct data_index *) b)->IndexGet))
802  return -1;
803 
804  if(((struct data_index *) a)->IndexGet > (((struct data_index *) b)->IndexGet))
805  return +1;
806 
807  return 0;
808 }

Referenced by ev_reduce_result().

Here is the caller graph for this function:

◆ ev_alloc_threadlocals()

static struct TreeWalkThreadLocals ev_alloc_threadlocals ( TreeWalk tw,
const int  NTask,
const int  NumThreads 
)
static

Definition at line 118 of file treewalk.c.

145 {
146  struct TreeWalkThreadLocals local_exports = {0};
147  local_exports.Exportflag = ta_malloc2("Exportthreads", int, 2*NTask*NumThreads);
148  local_exports.Exportnodecount = local_exports.Exportflag + NTask*NumThreads;
149  local_exports.Exportindex = ta_malloc2("Exportindex", size_t, NTask*NumThreads);
150  return local_exports;
151 }
#define ta_malloc2(name, type, nele)
Definition: mymalloc.h:26
size_t * Exportindex
Definition: treewalk.c:32
int NTask
Definition: test_exchange.c:23

References LocalTreeWalk::BunchSize, TreeWalk::BunchSize, LocalTreeWalk::DataIndexOffset, TreeWalkThreadLocals::Exportflag, LocalTreeWalk::exportflag, TreeWalkThreadLocals::Exportindex, LocalTreeWalk::exportindex, TreeWalkThreadLocals::Exportnodecount, LocalTreeWalk::exportnodecount, LocalTreeWalk::Nexport, LocalTreeWalk::ngblist, TreeWalk::Ngblist, LocalTreeWalk::Ninteractions, LocalTreeWalk::Nlist, LocalTreeWalk::Nnodesinlist, NTask, TreeWalk::NTask, part_manager_type::NumPart, PartManager, and LocalTreeWalk::tw.

Referenced by ev_primary(), and ev_secondary().

Here is the caller graph for this function:

◆ ev_begin()

static void ev_begin ( TreeWalk tw,
int *  active_set,
const size_t  size 
)
static

Definition at line 161 of file treewalk.c.

162 {
163  /* Needs to be 64-bit so that the multiplication in Ngblist malloc doesn't overflow*/
164  const size_t NumThreads = omp_get_max_threads();
165  MPI_Comm_size(MPI_COMM_WORLD, &tw->NTask);
166  /* The last argument is may_have_garbage: in practice the only
167  * trivial haswork is the gravtree, which has no (active) garbage because
168  * the active list was just rebuilt. If we ever add a trivial haswork after
169  * sfr/bh we should change this*/
170  treewalk_build_queue(tw, active_set, size, 0);
171 
172  /* Print some balance numbers*/
173  int64_t nmin, nmax, total;
174  MPI_Reduce(&tw->WorkSetSize, &nmin, 1, MPI_INT64, MPI_MIN, 0, MPI_COMM_WORLD);
175  MPI_Reduce(&tw->WorkSetSize, &nmax, 1, MPI_INT64, MPI_MAX, 0, MPI_COMM_WORLD);
176  MPI_Reduce(&tw->WorkSetSize, &total, 1, MPI_INT64, MPI_SUM, 0, MPI_COMM_WORLD);
177  message(0, "Treewalk %s iter %d: total part %ld max/MPI: %ld min/MPI: %ld balance: %g.\n",
178  tw->ev_label, tw->Niteration, total, nmax, nmin, (double)nmax/((total+0.001)/tw->NTask));
179 
180  /* Start first iteration at the beginning*/
181  tw->WorkSetStart = 0;
182 
183  if(!tw->NoNgblist)
184  tw->Ngblist = (int*) mymalloc("Ngblist", PartManager->NumPart * NumThreads * sizeof(int));
185  else
186  tw->Ngblist = NULL;
187 
189 
190  /* Assert that the query and result structures are aligned to 64-bit boundary,
191  * so that our MPI Send/Recv's happen from aligned memory.*/
192  if(tw->query_type_elsize % 8 != 0)
193  endrun(0, "Query structure has size %d, not aligned to 64-bit boundary.\n", tw->query_type_elsize);
194  if(tw->result_type_elsize % 8 != 0)
195  endrun(0, "Result structure has size %d, not aligned to 64-bit boundary.\n", tw->result_type_elsize);
196 
197  /*The amount of memory eventually allocated per tree buffer*/
198  size_t bytesperbuffer = sizeof(struct data_index) + sizeof(struct data_nodelist) + tw->query_type_elsize;
199  /*This memory scales like the number of imports. In principle this could be much larger than Nexport
200  * if the tree is very imbalanced and many processors all need to export to this one. In practice I have
201  * not seen this happen, but provide a parameter to boost the memory for Nimport just in case.*/
202  bytesperbuffer += ImportBufferBoost * (tw->query_type_elsize + tw->result_type_elsize);
203  /*Use all free bytes for the tree buffer, as in exchange. Leave some free memory for array overhead.*/
204  size_t freebytes = mymalloc_freebytes();
205  if(freebytes <= 4096 * 11 * bytesperbuffer) {
206  endrun(1231245, "Not enough memory for exporting any particles: needed %d bytes have %d. \n", bytesperbuffer, freebytes-4096*10);
207  }
208  freebytes -= 4096 * 10 * bytesperbuffer;
209 
210  tw->BunchSize = (size_t) floor(((double)freebytes)/ bytesperbuffer);
211  /* if the send/recv buffer is close to 4GB some MPIs have issues. */
212  const size_t twogb = 1024*1024*3092L;
213  if(tw->BunchSize * tw->query_type_elsize > twogb)
214  tw->BunchSize = twogb / tw->query_type_elsize;
215 
216  if(tw->BunchSize < 100)
217  endrun(2,"Only enough free memory to export %d elements.\n", tw->BunchSize);
218 
220  (struct data_index *) mymalloc("DataIndexTable", tw->BunchSize * sizeof(struct data_index));
221  DataNodeList =
222  (struct data_nodelist *) mymalloc("DataNodeList", tw->BunchSize * sizeof(struct data_nodelist));
223 
224 #ifdef DEBUG
225  memset(DataNodeList, -1, sizeof(struct data_nodelist) * tw->BunchSize);
226 #endif
227 }
void message(int where, const char *fmt,...)
Definition: endrun.c:175
void endrun(int where, const char *fmt,...)
Definition: endrun.c:147
#define mymalloc(name, size)
Definition: mymalloc.h:15
#define report_memory_usage(x)
Definition: mymalloc.h:30
#define mymalloc_freebytes()
Definition: mymalloc.h:31
struct part_manager_type PartManager[1]
Definition: partmanager.c:11
int64_t Niteration
Definition: treewalk.h:142
int64_t WorkSetSize
Definition: treewalk.h:163
size_t BunchSize
Definition: treewalk.h:152
int64_t WorkSetStart
Definition: treewalk.h:159
int NTask
Definition: treewalk.h:106
const char * ev_label
Definition: treewalk.h:91
int NoNgblist
Definition: treewalk.h:156
size_t result_type_elsize
Definition: treewalk.h:96
int * Ngblist
Definition: treewalk.h:154
size_t query_type_elsize
Definition: treewalk.h:95
#define MPI_INT64
Definition: system.h:12
static struct data_nodelist * DataNodeList
void treewalk_build_queue(TreeWalk *tw, int *active_set, const size_t size, int may_have_garbage)
Definition: treewalk.c:394
static struct data_index * DataIndexTable
static int ImportBufferBoost
Definition: treewalk.c:36

References TreeWalk::BunchSize, DataIndexTable, DataNodeList, endrun(), TreeWalk::ev_label, ImportBufferBoost, message(), MPI_INT64, mymalloc, mymalloc_freebytes, TreeWalk::Ngblist, TreeWalk::Niteration, TreeWalk::NoNgblist, TreeWalk::NTask, part_manager_type::NumPart, PartManager, TreeWalk::query_type_elsize, report_memory_usage, TreeWalk::result_type_elsize, treewalk_build_queue(), TreeWalk::WorkSetSize, and TreeWalk::WorkSetStart.

Referenced by treewalk_run().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ ev_communicate()

static void ev_communicate ( void *  sendbuf,
void *  recvbuf,
size_t  elsize,
const struct SendRecvBuffer  sndrcv,
int  import 
)
static

Definition at line 692 of file treewalk.c.

692  {
693  /* if import is 1, import the results from neigbhours */
694  MPI_Datatype type;
695  MPI_Type_contiguous(elsize, MPI_BYTE, &type);
696  MPI_Type_commit(&type);
697 
698  if(import) {
700  sendbuf, sndrcv.Recv_count, sndrcv.Recv_offset, type,
701  recvbuf, sndrcv.Send_count, sndrcv.Send_offset, type, MPI_COMM_WORLD);
702  } else {
704  sendbuf, sndrcv.Send_count, sndrcv.Send_offset, type,
705  recvbuf, sndrcv.Recv_count, sndrcv.Recv_offset, type, MPI_COMM_WORLD);
706  }
707  MPI_Type_free(&type);
708 }
int * Send_count
Definition: treewalk.c:23
int * Recv_offset
Definition: treewalk.c:24
int * Send_offset
Definition: treewalk.c:23
int * Recv_count
Definition: treewalk.c:24
int MPI_Alltoallv_sparse(void *sendbuf, int *sendcnts, int *sdispls, MPI_Datatype sendtype, void *recvbuf, int *recvcnts, int *rdispls, MPI_Datatype recvtype, MPI_Comm comm)
Definition: system.c:353

References MPI_Alltoallv_sparse(), SendRecvBuffer::Recv_count, SendRecvBuffer::Recv_offset, SendRecvBuffer::Send_count, and SendRecvBuffer::Send_offset.

Referenced by ev_reduce_result().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ ev_finish()

static void ev_finish ( TreeWalk tw)
static

Definition at line 229 of file treewalk.c.

230 {
233  if(tw->Ngblist)
234  myfree(tw->Ngblist);
236  myfree(tw->WorkSet);
237 
238 }
#define myfree(x)
Definition: mymalloc.h:19
int work_set_stolen_from_active
Definition: treewalk.h:165
int * WorkSet
Definition: treewalk.h:161

References DataIndexTable, DataNodeList, myfree, TreeWalk::Ngblist, TreeWalk::work_set_stolen_from_active, and TreeWalk::WorkSet.

Referenced by treewalk_run().

Here is the caller graph for this function:

◆ ev_free_threadlocals()

static void ev_free_threadlocals ( struct TreeWalkThreadLocals  local_exports)
static

Definition at line 154 of file treewalk.c.

155 {
156  ta_free(local_exports.Exportindex);
157  ta_free(local_exports.Exportflag);
158 }
#define ta_free(p)
Definition: mymalloc.h:28

References TreeWalkThreadLocals::Exportflag, TreeWalkThreadLocals::Exportindex, and ta_free.

Referenced by ev_primary(), and ev_secondary().

Here is the caller graph for this function:

◆ ev_get_remote()

static struct SendRecvBuffer ev_get_remote ( TreeWalk tw)
static

Definition at line 692 of file treewalk.c.

712 {
713  /* Can I avoid doing this sort?*/
715  int NTask = tw->NTask;
716  size_t i;
717  double tstart, tend;
718 
719  struct SendRecvBuffer sndrcv = {0};
720  sndrcv.Send_count = (int *) ta_malloc("Send_count", int, 4*NTask+1);
721  sndrcv.Recv_count = sndrcv.Send_count + NTask+1;
722  sndrcv.Send_offset = sndrcv.Send_count + 2*NTask+1;
723  sndrcv.Recv_offset = sndrcv.Send_count + 3*NTask+1;
724 
725  /* Fill the communication layouts */
726  /* Use the last element of SendCount to store the partially exported particles.*/
727  memset(sndrcv.Send_count, 0, sizeof(int)*(NTask+1));
728  for(i = 0; i < tw->Nexport; i++) {
729  sndrcv.Send_count[DataIndexTable[i].Task]++;
730  }
731  tw->Nexport -= sndrcv.Send_count[NTask];
732 
733  tstart = second();
734  MPI_Alltoall(sndrcv.Send_count, 1, MPI_INT, sndrcv.Recv_count, 1, MPI_INT, MPI_COMM_WORLD);
735  tend = second();
736  tw->timewait1 += timediff(tstart, tend);
737 
738  for(i = 0, tw->Nimport = 0, sndrcv.Recv_offset[0] = 0, sndrcv.Send_offset[0] = 0; i < (size_t) NTask; i++)
739  {
740  tw->Nimport += sndrcv.Recv_count[i];
741 
742  if(i > 0)
743  {
744  sndrcv.Send_offset[i] = sndrcv.Send_offset[i - 1] + sndrcv.Send_count[i - 1];
745  sndrcv.Recv_offset[i] = sndrcv.Recv_offset[i - 1] + sndrcv.Recv_count[i - 1];
746  }
747  }
748 
749  size_t j;
750  void * recvbuf = mymalloc("EvDataGet", tw->Nimport * tw->query_type_elsize);
751  char * sendbuf = (char *) mymalloc("EvDataIn", tw->Nexport * tw->query_type_elsize);
752 
753  tstart = second();
754  /* prepare particle data for export */
755 #pragma omp parallel for
756  for(j = 0; j < tw->Nexport; j++)
757  {
758  int place = DataIndexTable[j].Index;
759  TreeWalkQueryBase * input = (TreeWalkQueryBase*) (sendbuf + j * tw->query_type_elsize);
760  int * nodelist = DataNodeList[DataIndexTable[j].IndexGet].NodeList;
761  treewalk_init_query(tw, input, place, nodelist);
762  }
763  tend = second();
764  tw->timecomp1 += timediff(tstart, tend);
765 
766  tstart = second();
767  ev_communicate(sendbuf, recvbuf, tw->query_type_elsize, sndrcv, 0);
768  tend = second();
769  tw->timecommsumm1 += timediff(tstart, tend);
770  myfree(sendbuf);
771  tw->dataget = (char *) recvbuf;
772 #if 0
773  /* Check nodelist utilisation*/
774  size_t total_full_nodes = 0;
775  size_t nodelist_hist[NODELISTLENGTH] = {0};
776  for(i = 0; i < tw->Nexport; i++) {
777  int j;
778  for(j = 0; j < NODELISTLENGTH; j++)
779  if(DataNodeList[i].NodeList[j] == -1 || j == NODELISTLENGTH-1) {
780  total_full_nodes+=j;
781  if(j == NODELISTLENGTH -1)
782  nodelist_hist[j]++;
783  else
784  nodelist_hist[j-1]++;
785  break;
786  }
787  }
788  message(1, "Avg. node utilisation: %g Nodes %lu %lu %lu %lu %lu %lu %lu %lu\n", (double)total_full_nodes/tw->Nexport,nodelist_hist[0], nodelist_hist[1],nodelist_hist[2], nodelist_hist[3], nodelist_hist[4],nodelist_hist[5], nodelist_hist[6], nodelist_hist[7] );
789 #endif
790  return sndrcv;
791 }
#define ta_malloc(name, type, nele)
Definition: mymalloc.h:25
char * dataget
Definition: treewalk.h:110
double timecommsumm1
Definition: treewalk.h:126
double timecomp1
Definition: treewalk.h:123
size_t Nexport
Definition: treewalk.h:146
double timewait1
Definition: treewalk.h:121
size_t Nimport
Definition: treewalk.h:148
int Index
Definition: treewalk.c:50
int Task
Definition: treewalk.c:49
size_t IndexGet
Definition: treewalk.c:51
int NodeList[NODELISTLENGTH]
Definition: treewalk.c:40
double timediff(double t0, double t1)
Definition: system.c:92
double second(void)
Definition: system.c:83
#define qsort_openmp
Definition: test_exchange.c:14
static int data_index_compare(const void *a, const void *b)
Definition: treewalk.c:85
static void ev_communicate(void *sendbuf, void *recvbuf, size_t elsize, const struct SendRecvBuffer sndrcv, int import)
Definition: treewalk.c:692
static void treewalk_init_query(TreeWalk *tw, TreeWalkQueryBase *query, int i, int *NodeList)
Definition: treewalk.c:243
#define NODELISTLENGTH
Definition: treewalk.h:8

Referenced by treewalk_run().

Here is the caller graph for this function:

◆ ev_init_thread()

static void ev_init_thread ( const struct TreeWalkThreadLocals  local_exports,
TreeWalk *const  tw,
LocalTreeWalk lv 
)
static

Definition at line 118 of file treewalk.c.

119 {
120  const size_t thread_id = omp_get_thread_num();
121  const int NTask = tw->NTask;
122  int j;
123  lv->tw = tw;
124  lv->exportflag = local_exports.Exportflag + thread_id * NTask;
125  lv->exportnodecount = local_exports.Exportnodecount + thread_id * NTask;
126  lv->exportindex = local_exports.Exportindex + thread_id * NTask;
127  lv->Ninteractions = 0;
128  lv->Nnodesinlist = 0;
129  lv->Nlist = 0;
130  lv->Nexport = 0;
131  size_t localbunch = tw->BunchSize/omp_get_max_threads();
132  lv->DataIndexOffset = thread_id * localbunch;
133  lv->BunchSize = localbunch;
134  if(localbunch > tw->BunchSize - thread_id * localbunch)
135  lv->BunchSize = tw->BunchSize - thread_id * localbunch;
136 
137  if(tw->Ngblist)
138  lv->ngblist = tw->Ngblist + thread_id * PartManager->NumPart;
139  for(j = 0; j < NTask; j++)
140  lv->exportflag[j] = -1;
141 }
size_t Nexport
Definition: treewalk.h:53
int * exportnodecount
Definition: treewalk.h:58
size_t BunchSize
Definition: treewalk.h:54
int64_t Nlist
Definition: treewalk.h:65
int * exportflag
Definition: treewalk.h:57
int * ngblist
Definition: treewalk.h:62
int64_t Ninteractions
Definition: treewalk.h:63
size_t DataIndexOffset
Definition: treewalk.h:60
int64_t Nnodesinlist
Definition: treewalk.h:64
size_t * exportindex
Definition: treewalk.h:59
TreeWalk * tw
Definition: treewalk.h:47

Referenced by ev_secondary(), and real_ev().

Here is the caller graph for this function:

◆ ev_ndone()

static int ev_ndone ( TreeWalk tw)
static

Definition at line 513 of file treewalk.c.

514 {
515  int ndone;
516  double tstart, tend;
517  tstart = second();
518  int done = !(tw->BufferFullFlag);
519  MPI_Allreduce(&done, &ndone, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
520  tend = second();
521  tw->timewait2 += timediff(tstart, tend);
522  return ndone;
523 
524 }
int BufferFullFlag
Definition: treewalk.h:150
double timewait2
Definition: treewalk.h:122

References TreeWalk::BufferFullFlag, second(), timediff(), and TreeWalk::timewait2.

Referenced by treewalk_run().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ ev_primary()

static void ev_primary ( TreeWalk tw)
static

Definition at line 466 of file treewalk.c.

467 {
468  double tstart, tend;
469  tw->BufferFullFlag = 0;
470  tw->Nexport = 0;
471 
472  tstart = second();
473 
474  struct TreeWalkThreadLocals local_exports = ev_alloc_threadlocals(tw, tw->NTask, tw->NThread);
475  int currentIndex = tw->WorkSetStart;
476  int lastSucceeded = tw->WorkSetSize;
477 
478  size_t * nexports = ta_malloc("localexports", size_t, tw->NThread);
479  size_t * dataindexoffset = ta_malloc("dataindex", size_t, tw->NThread);
480 
481 #pragma omp parallel reduction(min: lastSucceeded)
482  {
483  int tid = omp_get_thread_num();
484  lastSucceeded = real_ev(local_exports, tw, &dataindexoffset[tid], &nexports[tid], &currentIndex);
485  }
486 
487  int64_t i;
488  tw->Nexport = 0;
489 
490  /* Compactify the export queue*/
491  for(i = 0; i < tw->NThread; i++)
492  {
493  /* Only need to move if this thread is not full*/
494  if(tw->Nexport != dataindexoffset[i])
495  memmove(DataIndexTable + tw->Nexport, DataIndexTable + dataindexoffset[i], sizeof(DataIndexTable[0]) * nexports[i]);
496  tw->Nexport += nexports[i];
497  }
498 
499  myfree(dataindexoffset);
500  myfree(nexports);
501  ev_free_threadlocals(local_exports);
502 
503  /* Set the place to start the next iteration. Note that because lastSucceeded
504  * is the minimum entry across all threads, some particles may have their trees partially walked twice locally.
505  * This is fine because we walk the tree to fill up the Ngbiter list.
506  * Only a full Ngbiter list is executed; partial lists are discarded.*/
507  tw->WorkSetStart = lastSucceeded + 1;
508 
509  tend = second();
510  tw->timecomp1 += timediff(tstart, tend);
511 }
int64_t NThread
Definition: treewalk.h:107
static void ev_free_threadlocals(struct TreeWalkThreadLocals local_exports)
Definition: treewalk.c:154
static int real_ev(struct TreeWalkThreadLocals local_exports, TreeWalk *tw, size_t *dataindexoffset, size_t *nexports, int *currentIndex)
Definition: treewalk.c:288
static struct TreeWalkThreadLocals ev_alloc_threadlocals(TreeWalk *tw, const int NTask, const int NumThreads)
Definition: treewalk.c:144

References TreeWalk::BufferFullFlag, DataIndexTable, ev_alloc_threadlocals(), ev_free_threadlocals(), myfree, TreeWalk::Nexport, TreeWalk::NTask, TreeWalk::NThread, real_ev(), second(), ta_malloc, TreeWalk::timecomp1, timediff(), TreeWalk::WorkSetSize, and TreeWalk::WorkSetStart.

Referenced by treewalk_run().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ ev_reduce_result()

static void ev_reduce_result ( const struct SendRecvBuffer  sndrcv,
TreeWalk tw 
)
static

Definition at line 810 of file treewalk.c.

811 {
812 
813  int j;
814  double tstart, tend;
815 
816  const int Nexport = tw->Nexport;
817  void * sendbuf = tw->dataresult;
818  char * recvbuf = (char*) mymalloc("EvDataOut",
819  Nexport * tw->result_type_elsize);
820 
821  tstart = second();
822  ev_communicate(sendbuf, recvbuf, tw->result_type_elsize, sndrcv, 1);
823  tend = second();
824  tw->timecommsumm2 += timediff(tstart, tend);
825 
826  tstart = second();
827 
828  for(j = 0; j < Nexport; j++) {
829  DataIndexTable[j].IndexGet = j;
830  }
831 
832  /* mysort is a lie! */
834 
835  int * UniqueOff = (int *) mymalloc("UniqueIndex", sizeof(int) * (Nexport + 1));
836  UniqueOff[0] = 0;
837  int Nunique = 0;
838 
839  for(j = 1; j < Nexport; j++) {
840  if(DataIndexTable[j].Index != DataIndexTable[j-1].Index)
841  UniqueOff[++Nunique] = j;
842  }
843  if(Nexport > 0)
844  UniqueOff[++Nunique] = Nexport;
845 
846  if(tw->reduce != NULL) {
847 #pragma omp parallel for
848  for(j = 0; j < Nunique; j++)
849  {
850  int k;
851  int place = DataIndexTable[UniqueOff[j]].Index;
852  int start = UniqueOff[j];
853  int end = UniqueOff[j + 1];
854  for(k = start; k < end; k++) {
855  int get = DataIndexTable[k].IndexGet;
856  TreeWalkResultBase * output = (TreeWalkResultBase*) (recvbuf + tw->result_type_elsize * get);
857  treewalk_reduce_result(tw, output, place, TREEWALK_GHOSTS);
858  }
859  }
860  }
861  myfree(UniqueOff);
862  tend = second();
863  tw->timecomp1 += timediff(tstart, tend);
864  myfree(recvbuf);
865  myfree(tw->dataresult);
866  myfree(tw->dataget);
867 }
char * dataresult
Definition: treewalk.h:111
TreeWalkReduceResultFunction reduce
Definition: treewalk.h:102
double timecommsumm2
Definition: treewalk.h:127
static void treewalk_reduce_result(TreeWalk *tw, TreeWalkResultBase *result, int i, enum TreeWalkReduceMode mode)
Definition: treewalk.c:278
static int data_index_compare_by_index(const void *a, const void *b)
Definition: treewalk.c:793
@ TREEWALK_GHOSTS
Definition: treewalk.h:17

References data_index_compare_by_index(), TreeWalk::dataget, DataIndexTable, TreeWalk::dataresult, ev_communicate(), data_index::Index, data_index::IndexGet, myfree, mymalloc, TreeWalk::Nexport, qsort_openmp, TreeWalk::reduce, TreeWalk::result_type_elsize, second(), TreeWalk::timecommsumm2, TreeWalk::timecomp1, timediff(), TREEWALK_GHOSTS, and treewalk_reduce_result().

Referenced by treewalk_run().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ ev_secondary()

static void ev_secondary ( TreeWalk tw)
static

Definition at line 526 of file treewalk.c.

527 {
528  double tstart, tend;
529 
530  tstart = second();
531  tw->dataresult = (char *) mymalloc("EvDataResult", tw->Nimport * tw->result_type_elsize);
532 
533  struct TreeWalkThreadLocals local_exports = ev_alloc_threadlocals(tw, tw->NTask, tw->NThread);
534  int nnodes = tw->Nnodesinlist;
535  int nlist = tw->Nlist;
536 #pragma omp parallel reduction(+: nnodes) reduction(+: nlist)
537  {
538  size_t j;
539  LocalTreeWalk lv[1];
540 
541  ev_init_thread(local_exports, tw, lv);
542  lv->mode = 1;
543 #pragma omp for
544  for(j = 0; j < tw->Nimport; j++) {
545  TreeWalkQueryBase * input = (TreeWalkQueryBase*) (tw->dataget + j * tw->query_type_elsize);
547  treewalk_init_result(tw, output, input);
548  lv->target = -1;
549  tw->visit(input, output, lv);
550  }
551  nnodes += lv->Nnodesinlist;
552  nlist += lv->Nlist;
553  }
554  tw->Nnodesinlist = nnodes;
555  tw->Nlist = nlist;
556 
557  ev_free_threadlocals(local_exports);
558  tend = second();
559  tw->timecomp2 += timediff(tstart, tend);
560 }
double timecomp2
Definition: treewalk.h:124
int64_t Nlist
Definition: treewalk.h:134
TreeWalkVisitFunction visit
Definition: treewalk.h:99
int64_t Nnodesinlist
Definition: treewalk.h:131
static void ev_init_thread(const struct TreeWalkThreadLocals local_exports, TreeWalk *const tw, LocalTreeWalk *lv)
Definition: treewalk.c:118
static void treewalk_init_result(TreeWalk *tw, TreeWalkResultBase *result, TreeWalkQueryBase *query)
Definition: treewalk.c:269

References TreeWalk::dataget, TreeWalk::dataresult, ev_alloc_threadlocals(), ev_free_threadlocals(), ev_init_thread(), LocalTreeWalk::mode, mymalloc, TreeWalk::Nimport, LocalTreeWalk::Nlist, TreeWalk::Nlist, LocalTreeWalk::Nnodesinlist, TreeWalk::Nnodesinlist, TreeWalk::NTask, TreeWalk::NThread, TreeWalk::query_type_elsize, TreeWalk::result_type_elsize, second(), LocalTreeWalk::target, TreeWalk::timecomp2, timediff(), treewalk_init_result(), and TreeWalk::visit.

Referenced by treewalk_run().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ ngb_narrow_down()

double ngb_narrow_down ( double *  right,
double *  left,
const double *  radius,
const double *  numNgb,
int  maxcmpt,
int  desnumngb,
int *  closeidx,
double  BoxSize 
)

Definition at line 1366 of file treewalk.c.

1367 {
1368  int j;
1369  int close = 0;
1370  double ngbdist = fabs(numNgb[0] - desnumngb);
1371  for(j = 1; j < maxcmpt; j++){
1372  double newdist = fabs(numNgb[j] - desnumngb);
1373  if(newdist < ngbdist){
1374  ngbdist = newdist;
1375  close = j;
1376  }
1377  }
1378  if(closeidx)
1379  *closeidx = close;
1380 
1381  for(j = 0; j < maxcmpt; j++){
1382  if(numNgb[j] < desnumngb)
1383  *left = radius[j];
1384  if(numNgb[j] > desnumngb){
1385  *right = radius[j];
1386  break;
1387  }
1388  }
1389 
1390  double hsml = radius[close];
1391 
1392  if(*right > 0.99 * BoxSize){
1393  double dngbdv = 0;
1394  if(maxcmpt > 1 && (radius[maxcmpt-1]>radius[maxcmpt-2]))
1395  dngbdv = (numNgb[maxcmpt-1]-numNgb[maxcmpt-2])/(pow(radius[maxcmpt-1],3) - pow(radius[maxcmpt-2],3));
1396  /* Increase hsml by a maximum factor to avoid madness. We can be fairly aggressive about this factor.*/
1397  double newhsml = 4 * hsml;
1398  if(dngbdv > 0) {
1399  double dngb = (desnumngb - numNgb[maxcmpt-1]);
1400  double newvolume = pow(hsml,3) + dngb / dngbdv;
1401  if(pow(newvolume, 1./3) < newhsml)
1402  newhsml = pow(newvolume, 1./3);
1403  }
1404  hsml = newhsml;
1405  }
1406  if(hsml > *right)
1407  hsml = *right;
1408 
1409  if(*left == 0) {
1410  /* Extrapolate using volume, ie locally constant density*/
1411  double dngbdv = 0;
1412  if(radius[1] > radius[0])
1413  dngbdv = (numNgb[1] - numNgb[0]) / (pow(radius[1],3) - pow(radius[0],3));
1414  /* Derivative is not defined for minimum, so use 0.*/
1415  if(maxcmpt == 1 && radius[0] > 0)
1416  dngbdv = numNgb[0] / pow(radius[0],3);
1417 
1418  if(dngbdv > 0) {
1419  double dngb = desnumngb - numNgb[0];
1420  double newvolume = pow(hsml,3) + dngb / dngbdv;
1421  hsml = pow(newvolume, 1./3);
1422  }
1423  }
1424  if(hsml < *left)
1425  hsml = *left;
1426 
1427  return hsml;
1428 }

Referenced by sfr_wind_weight_postprocess(), and stellar_density_check_neighbours().

Here is the caller graph for this function:

◆ ngb_treefind_threads()

static int ngb_treefind_threads ( TreeWalkQueryBase I,
TreeWalkResultBase O,
TreeWalkNgbIterBase iter,
int  startnode,
LocalTreeWalk lv 
)
static

Definition at line 1052 of file treewalk.c.

1057 {
1058  int no;
1059  int numcand = 0;
1060 
1061  const ForceTree * tree = lv->tw->tree;
1062  const double BoxSize = tree->BoxSize;
1063 
1064  no = startnode;
1065 
1066  while(no >= 0)
1067  {
1068  if(node_is_particle(no, tree)) {
1069  int fat = force_get_father(no, tree);
1070  endrun(12312, "Particles should be added before getting here! no = %d, father = %d (ptype = %d) start=%d mode = %d\n", no, fat, tree->Nodes[fat].f.ChildType, startnode, lv->mode);
1071  }
1072  if(node_is_pseudo_particle(no, tree)) {
1073  int fat = force_get_father(no, tree);
1074  endrun(12312, "Pseudo-Particles should be added before getting here! no = %d, father = %d (ptype = %d)\n", no, fat, tree->Nodes[fat].f.ChildType);
1075  }
1076 
1077  struct NODE *current = &tree->Nodes[no];
1078 
1079  /* When walking exported particles we start from the encompassing top-level node,
1080  * so if we get back to a top-level node again we are done.*/
1081  if(lv->mode == 1) {
1082  /* The first node is always top-level*/
1083  if(current->f.TopLevel && no != startnode) {
1084  /* we reached a top-level node again, which means that we are done with the branch */
1085  break;
1086  }
1087  }
1088 
1089  /* Cull the node */
1090  if(0 == cull_node(I, iter, current, BoxSize)) {
1091  /* in case the node can be discarded */
1092  no = current->sibling;
1093  continue;
1094  }
1095 
1096  /* Node contains relevant particles, add them.*/
1097  if(current->f.ChildType == PARTICLE_NODE_TYPE) {
1098  int i;
1099  int * suns = current->s.suns;
1100  for (i = 0; i < current->s.noccupied; i++) {
1101  /* must be the correct type: compare the
1102  * current type for this subnode extracted
1103  * from the bitfield to the mask.*/
1104  int type = (current->s.Types >> (3*i)) % 8;
1105 
1106  if(!((1<<type) & iter->mask))
1107  continue;
1108 
1109  lv->ngblist[numcand++] = suns[i];
1110  }
1111  /* Move sideways*/
1112  no = current->sibling;
1113  continue;
1114  }
1115  else if(current->f.ChildType == PSEUDO_NODE_TYPE) {
1116  /* pseudo particle */
1117  if(lv->mode == 1) {
1118  endrun(12312, "Secondary for particle %d from node %d found pseudo at %d.\n", lv->target, startnode, current);
1119  } else {
1120  /* Export the pseudo particle*/
1121  if(-1 == treewalk_export_particle(lv, current->s.suns[0]))
1122  return -1;
1123  /* Move sideways*/
1124  no = current->sibling;
1125  continue;
1126  }
1127  }
1128  /* ok, we need to open the node */
1129  no = current->s.suns[0];
1130  }
1131 
1132  return numcand;
1133 }
int force_get_father(int no, const ForceTree *tree)
Definition: forcetree.c:905
#define PARTICLE_NODE_TYPE
Definition: forcetree.h:15
static int node_is_particle(int no, const ForceTree *tree)
Definition: forcetree.h:139
#define PSEUDO_NODE_TYPE
Definition: forcetree.h:17
static int node_is_pseudo_particle(int no, const ForceTree *tree)
Definition: forcetree.h:133
double BoxSize
Definition: forcetree.h:106
struct NODE * Nodes
Definition: forcetree.h:97
Definition: forcetree.h:39
int sibling
Definition: forcetree.h:40
unsigned int ChildType
Definition: forcetree.h:49
unsigned int TopLevel
Definition: forcetree.h:47
struct NodeChild s
Definition: forcetree.h:66
struct NODE::@8 f
int noccupied
Definition: forcetree.h:35
int suns[NMAXCHILD]
Definition: forcetree.h:32
unsigned int Types
Definition: forcetree.h:30
const ForceTree * tree
Definition: treewalk.h:88
static int cull_node(const TreeWalkQueryBase *const I, const TreeWalkNgbIterBase *const iter, const struct NODE *const current, const double BoxSize)
Definition: treewalk.c:1011
int treewalk_export_particle(LocalTreeWalk *lv, int no)
Definition: treewalk.c:567

References ForceTree::BoxSize, NODE::ChildType, cull_node(), endrun(), NODE::f, force_get_father(), TreeWalkNgbIterBase::mask, LocalTreeWalk::mode, LocalTreeWalk::ngblist, NodeChild::noccupied, node_is_particle(), node_is_pseudo_particle(), ForceTree::Nodes, PARTICLE_NODE_TYPE, PSEUDO_NODE_TYPE, NODE::s, NODE::sibling, NodeChild::suns, LocalTreeWalk::target, NODE::TopLevel, TreeWalk::tree, treewalk_export_particle(), LocalTreeWalk::tw, and NodeChild::Types.

Referenced by treewalk_visit_ngbiter().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ real_ev()

static int real_ev ( struct TreeWalkThreadLocals  local_exports,
TreeWalk tw,
size_t *  dataindexoffset,
size_t *  nexports,
int *  currentIndex 
)
static

Definition at line 288 of file treewalk.c.

289 {
290  LocalTreeWalk lv[1];
291  /* Note: exportflag is local to each thread */
292  ev_init_thread(local_exports, tw, lv);
293  lv->mode = 0;
294 
295  /* use old index to recover from a buffer overflow*/;
296  TreeWalkQueryBase * input = (TreeWalkQueryBase *) alloca(tw->query_type_elsize);
297  TreeWalkResultBase * output = (TreeWalkResultBase *) alloca(tw->result_type_elsize);
298 
299  int64_t lastSucceeded = tw->WorkSetStart - 1;
300  /* We must schedule monotonically so that if the export buffer fills up
301  * it is guaranteed that earlier particles are already done.
302  * However, we schedule dynamically so that we have reduced imbalance.
303  * We do not use the openmp dynamic scheduling, but roll our own
304  * so that we can break from the loop if needed.*/
305  int chnk = 0;
306  /* chunk size: 1 and 1000 were slightly (3 percent) slower than 8.
307  * FoF treewalk needs a larger chnksz to avoid contention.*/
308  int chnksz = tw->WorkSetSize / (4*tw->NThread);
309  if(chnksz < 1)
310  chnksz = 1;
311  if(chnksz > 100)
312  chnksz = 100;
313  do {
314  /* Get another chunk from the global queue*/
315  chnk = atomic_fetch_and_add(currentIndex, chnksz);
316  /* This is a hand-rolled version of what openmp dynamic scheduling is doing.*/
317  int end = chnk + chnksz;
318  /* Make sure we do not overflow the loop*/
319  if(end > tw->WorkSetSize)
320  end = tw->WorkSetSize;
321  /* Reduce the chunk size towards the end of the walk*/
322  if((tw->WorkSetSize < end + chnksz * tw->NThread) && chnksz >= 2)
323  chnksz /= 2;
324  int k;
325  for(k = chnk; k < end; k++) {
326  /* Skip already evaluated particles. This is only used if the buffer fills up.*/
327  if(tw->evaluated && tw->evaluated[k])
328  continue;
329 
330  const int i = tw->WorkSet ? tw->WorkSet[k] : k;
331  /* Primary never uses node list */
332  treewalk_init_query(tw, input, i, NULL);
333  treewalk_init_result(tw, output, input);
334  lv->target = i;
335  /* Reset the number of exported particles.*/
336  lv->NThisParticleExport = 0;
337  const int rt = tw->visit(input, output, lv);
338  if(lv->NThisParticleExport > 1000)
339  message(5, "%d exports for particle %d! Odd.\n", lv->NThisParticleExport, k);
340  if(rt < 0) {
341  /* export buffer has filled up, can't do more work.*/
342  break;
343  } else {
345  /* We need lastSucceeded as well as currentIndex so that
346  * if the export buffer fills up in the middle of a
347  * chunk we still get the right answer. Notice it is thread-local*/
348  lastSucceeded = k;
349  if(tw->evaluated)
350  tw->evaluated[k] = 1;
351  }
352  }
353  /* If we filled up, we need to remove the partially evaluated last particle from the export list and leave this loop.*/
354  if(lv->Nexport >= lv->BunchSize) {
355  message(1, "Tree export buffer full with %ld particles. start %ld lastsucceeded: %ld end %d size %ld.\n",
356  lv->Nexport, tw->WorkSetStart, lastSucceeded, end, tw->WorkSetSize);
357  #pragma omp atomic write
358  tw->BufferFullFlag = 1;
359  /* If the above loop finished, we don't need to remove the fully exported particle*/
360  if(lastSucceeded < end) {
361  /* Touch up the DataIndexTable, so that partial particle exports are discarded.
362  * Since this queue is per-thread, it is ordered.*/
363  lv->Nexport-= lv->NThisParticleExport;
364  const int lastreal = tw->WorkSet ? tw->WorkSet[k] : k;
365  /* Index stores tw->target, which is the current particle.*/
366  if(lv->NThisParticleExport > 0 && DataIndexTable[lv->DataIndexOffset + lv->Nexport].Index > lastreal)
367  endrun(5, "Something screwed up in export queue: nexp %ld (local %d) last %d < index %d\n", lv->Nexport,
369  /* Leave this chunking loop.*/
370  }
371  break;
372  }
373  } while(chnk < tw->WorkSetSize);
374 
375  *dataindexoffset = lv->DataIndexOffset;
376  *nexports = lv->Nexport;
377  return lastSucceeded;
378 }
size_t NThisParticleExport
Definition: treewalk.h:56
char * evaluated
Definition: treewalk.h:118
static int atomic_fetch_and_add(int *ptr, int value)
Definition: system.h:78
@ TREEWALK_PRIMARY
Definition: treewalk.h:16

References atomic_fetch_and_add(), TreeWalk::BufferFullFlag, LocalTreeWalk::BunchSize, LocalTreeWalk::DataIndexOffset, DataIndexTable, endrun(), ev_init_thread(), TreeWalk::evaluated, data_index::Index, message(), LocalTreeWalk::mode, LocalTreeWalk::Nexport, LocalTreeWalk::NThisParticleExport, TreeWalk::NThread, TreeWalk::query_type_elsize, TreeWalk::result_type_elsize, LocalTreeWalk::target, treewalk_init_query(), treewalk_init_result(), TREEWALK_PRIMARY, treewalk_reduce_result(), TreeWalk::visit, TreeWalk::WorkSet, TreeWalk::WorkSetSize, and TreeWalk::WorkSetStart.

Referenced by ev_primary().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ set_treewalk_params()

void set_treewalk_params ( ParameterSet ps)

Definition at line 55 of file treewalk.c.

56 {
57  int ThisTask;
58  MPI_Comm_rank(MPI_COMM_WORLD, &ThisTask);
59  if(ThisTask == 0)
60  ImportBufferBoost = param_get_int(ps, "ImportBufferBoost");
61  MPI_Bcast(&ImportBufferBoost, 1, MPI_INT, 0, MPI_COMM_WORLD);
62 }
int param_get_int(ParameterSet *ps, const char *name)
Definition: paramset.c:368
int ThisTask
Definition: test_exchange.c:23

References ImportBufferBoost, param_get_int(), and ThisTask.

Referenced by read_parameter_file().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ treewalk_build_queue()

void treewalk_build_queue ( TreeWalk tw,
int *  active_set,
const size_t  size,
int  may_have_garbage 
)

Definition at line 394 of file treewalk.c.

395 {
396  tw->NThread = omp_get_max_threads();
397 
398  if(!tw->haswork && !may_have_garbage)
399  {
400  tw->WorkSetSize = size;
401  tw->WorkSet = active_set;
403  return;
404  }
405 
406  /* Since we use a static schedule below we only need size / tw->NThread elements per thread.
407  * Add NThread so that each thread has a little extra space.*/
408  size_t tsize = size / tw->NThread + tw->NThread;
409  /*Watch out: tw->WorkSet may change a few lines later due to the realloc*/
410  tw->WorkSet = (int *) mymalloc("ActiveQueue", tsize * sizeof(int) * tw->NThread);
412  size_t i;
413  size_t nqueue = 0;
414 
415  /*We want a lockless algorithm which preserves the ordering of the particle list.*/
416  size_t *nqthr = ta_malloc("nqthr", size_t, tw->NThread);
417  int **thrqueue = ta_malloc("thrqueue", int *, tw->NThread);
418 
419  gadget_setup_thread_arrays(tw->WorkSet, thrqueue, nqthr, tsize, tw->NThread);
420 
421  /* We enforce schedule static to ensure that each thread executes on contiguous particles.
422  * Note static enforces the monotonic modifier but on OpenMP 5.0 nonmonotonic is the default.
423  * static also ensures that no single thread gets more than tsize elements.*/
424  size_t schedsz = size/tw->NThread+1;
425  #pragma omp parallel for schedule(static, schedsz)
426  for(i=0; i < size; i++)
427  {
428  const int tid = omp_get_thread_num();
429  /*Use raw particle number if active_set is null, otherwise use active_set*/
430  const int p_i = active_set ? active_set[i] : (int) i;
431 
432  /* Skip the garbage particles */
433  if(P[p_i].IsGarbage)
434  continue;
435 
436  if(tw->haswork && !tw->haswork(p_i, tw))
437  continue;
438 #ifdef DEBUG
439  if(nqthr[tid] >= tsize)
440  endrun(5, "tid = %d nqthr = %d, tsize = %d size = %d, tw->Nthread = %d i = %d\n", tid, nqthr[tid], tsize, size, tw->NThread, i);
441 #endif
442  thrqueue[tid][nqthr[tid]] = p_i;
443  nqthr[tid]++;
444  }
445  /*Merge step for the queue.*/
446  nqueue = gadget_compact_thread_arrays(tw->WorkSet, thrqueue, nqthr, tw->NThread);
447  ta_free(thrqueue);
448  ta_free(nqthr);
449  /*Shrink memory*/
450  tw->WorkSet = (int *) myrealloc(tw->WorkSet, sizeof(int) * nqueue);
451 
452 #if 0
453  /* check the uniqueness of the active_set list. This is very slow. */
454  qsort_openmp(tw->WorkSet, nqueue, sizeof(int), cmpint);
455  for(i = 0; i < nqueue - 1; i ++) {
456  if(tw->WorkSet[i] == tw->WorkSet[i+1]) {
457  endrun(8829, "A few particles are twicely active.\n");
458  }
459  }
460 #endif
461  tw->WorkSetSize = nqueue;
462 }
#define myrealloc(ptr, size)
Definition: mymalloc.h:18
#define P
Definition: partmanager.h:88
TreeWalkHasWorkFunction haswork
Definition: treewalk.h:100
void gadget_setup_thread_arrays(int *dest, int *srcs[], size_t sizes[], size_t total_size, int narrays)
Definition: system.c:600
size_t gadget_compact_thread_arrays(int *dest, int *srcs[], size_t sizes[], int narrays)
Definition: system.c:587

References endrun(), gadget_compact_thread_arrays(), gadget_setup_thread_arrays(), TreeWalk::haswork, mymalloc, myrealloc, TreeWalk::NThread, P, qsort_openmp, ta_free, ta_malloc, TreeWalk::work_set_stolen_from_active, TreeWalk::WorkSet, and TreeWalk::WorkSetSize.

Referenced by blackhole(), ev_begin(), and treewalk_do_hsml_loop().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ treewalk_do_hsml_loop()

void treewalk_do_hsml_loop ( TreeWalk tw,
int *  queue,
int64_t  queuesize,
int  update_hsml 
)

Definition at line 1254 of file treewalk.c.

1255 {
1256  int64_t ntot = 0;
1257  int NumThreads = omp_get_max_threads();
1258  tw->NPLeft = ta_malloc("NPLeft", size_t, NumThreads);
1259  tw->NPRedo = ta_malloc("NPRedo", int *, NumThreads);
1260  tw->maxnumngb = ta_malloc("numngb", double, NumThreads);
1261  tw->minnumngb = ta_malloc("numngb2", double, NumThreads);
1262 
1263  /* Build the first queue */
1264  treewalk_build_queue(tw, queue, queuesize, 0);
1265  /* Next call to treewalk_run will over-write these pointers*/
1266  int64_t size = tw->WorkSetSize;
1267  int * ReDoQueue = tw->WorkSet;
1268  /* First queue is allocated low*/
1269  int alloc_high = 0;
1270  /* We don't need to redo the queue generation
1271  * but need to keep track of allocated memory.*/
1272  int orig_queue_alloc = (tw->haswork != NULL);
1273  tw->haswork = NULL;
1274 
1275  /* we will repeat the whole thing for those particles where we didn't find enough neighbours */
1276  do {
1277  /* The RedoQueue needs enough memory to store every workset particle on every thread, because
1278  * we cannot guarantee that the sph particles are evenly spread across threads!*/
1279  int * CurQueue = ReDoQueue;
1280  int i;
1281  for(i = 0; i < NumThreads; i++) {
1282  tw->maxnumngb[i] = 0;
1283  tw->minnumngb[i] = 1e50;
1284  }
1285 
1286  /* The ReDoQueue swaps between high and low allocations so we can have two allocated alternately*/
1287  if(update_hsml) {
1288  if(!alloc_high) {
1289  ReDoQueue = (int *) mymalloc2("ReDoQueue", size * sizeof(int) * NumThreads);
1290  alloc_high = 1;
1291  }
1292  else {
1293  ReDoQueue = (int *) mymalloc("ReDoQueue", size * sizeof(int) * NumThreads);
1294  alloc_high = 0;
1295  }
1296  tw->Redo_thread_alloc = size;
1297  gadget_setup_thread_arrays(ReDoQueue, tw->NPRedo, tw->NPLeft, size, NumThreads);
1298  }
1299  treewalk_run(tw, CurQueue, size);
1300 
1301  /* Now done with the current queue*/
1302  if(orig_queue_alloc || tw->Niteration > 1)
1303  myfree(CurQueue);
1304 
1305  /* We can stop if we are not updating hsml*/
1306  if(!update_hsml)
1307  break;
1308 
1309  /* Set up the next queue*/
1310  size = gadget_compact_thread_arrays(ReDoQueue, tw->NPRedo, tw->NPLeft, NumThreads);
1311 
1312  MPI_Allreduce(&size, &ntot, 1, MPI_INT64, MPI_SUM, MPI_COMM_WORLD);
1313  if(ntot == 0){
1314  myfree(ReDoQueue);
1315  break;
1316  }
1317  for(i = 1; i < NumThreads; i++) {
1318  if(tw->maxnumngb[0] < tw->maxnumngb[i])
1319  tw->maxnumngb[0] = tw->maxnumngb[i];
1320  if(tw->minnumngb[0] > tw->minnumngb[i])
1321  tw->minnumngb[0] = tw->minnumngb[i];
1322  }
1323  double minngb, maxngb;
1324  MPI_Reduce(&tw->maxnumngb[0], &maxngb, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
1325  MPI_Reduce(&tw->minnumngb[0], &minngb, 1, MPI_DOUBLE, MPI_MIN, 0, MPI_COMM_WORLD);
1326  message(0, "Max ngb=%g, min ngb=%g\n", maxngb, minngb);
1327 
1328  /*Shrink memory*/
1329  ReDoQueue = (int *) myrealloc(ReDoQueue, sizeof(int) * size);
1330 
1331  /*
1332  if(ntot < 1 ) {
1333  foreach(ActiveParticle)
1334  {
1335  if(density_haswork(i)) {
1336  MyFloat Left = DENSITY_GET_PRIV(tw)->Left[i];
1337  MyFloat Right = DENSITY_GET_PRIV(tw)->Right[i];
1338  message (1, "i=%d task=%d ID=%llu type=%d, Hsml=%g Left=%g Right=%g Ngbs=%g Right-Left=%g\n pos=(%g|%g|%g)\n",
1339  i, ThisTask, P[i].ID, P[i].Type, P[i].Hsml, Left, Right,
1340  (float) P[i].NumNgb, Right - Left, P[i].Pos[0], P[i].Pos[1], P[i].Pos[2]);
1341  }
1342  }
1343 
1344  }
1345  */
1346 #ifdef DEBUG
1347  if(ntot == 1 && size > 0 && tw->Niteration > 20 ) {
1348  int pp = ReDoQueue[0];
1349  message(1, "Remaining i=%d, t %d, pos %g %g %g, hsml: %g\n", pp, P[pp].Type, P[pp].Pos[0], P[pp].Pos[1], P[pp].Pos[2], P[pp].Hsml);
1350  }
1351 #endif
1352 
1353  if(size > 0 && tw->Niteration > MAXITER) {
1354  endrun(1155, "failed to converge density for %ld particles\n", ntot);
1355  }
1356  } while(1);
1357  ta_free(tw->minnumngb);
1358  ta_free(tw->maxnumngb);
1359  ta_free(tw->NPRedo);
1360  ta_free(tw->NPLeft);
1361 }
#define MAXITER
Definition: cooling.c:46
#define mymalloc2(name, size)
Definition: mymalloc.h:16
double * minnumngb
Definition: treewalk.h:172
double * maxnumngb
Definition: treewalk.h:171
int ** NPRedo
Definition: treewalk.h:168
size_t Redo_thread_alloc
Definition: treewalk.h:169
size_t * NPLeft
Definition: treewalk.h:167
void treewalk_run(TreeWalk *tw, int *active_set, size_t size)
Definition: treewalk.c:619

References endrun(), gadget_compact_thread_arrays(), gadget_setup_thread_arrays(), TreeWalk::haswork, MAXITER, TreeWalk::maxnumngb, message(), TreeWalk::minnumngb, MPI_INT64, myfree, mymalloc, mymalloc2, myrealloc, TreeWalk::Niteration, TreeWalk::NPLeft, TreeWalk::NPRedo, P, TreeWalk::Redo_thread_alloc, ta_free, ta_malloc, treewalk_build_queue(), treewalk_run(), TreeWalk::WorkSet, and TreeWalk::WorkSetSize.

Referenced by density(), stellar_density(), and winds_find_weights().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ treewalk_export_particle()

int treewalk_export_particle ( LocalTreeWalk lv,
int  no 
)

Definition at line 567 of file treewalk.c.

568 {
569  if(lv->mode != 0) {
570  endrun(1, "Trying to export a ghost particle.\n");
571  }
572  const int target = lv->target;
573  int *exportflag = lv->exportflag;
574  int *exportnodecount = lv->exportnodecount;
575  size_t *exportindex = lv->exportindex;
576  TreeWalk * tw = lv->tw;
577 
578  const int task = tw->tree->TopLeaves[no - tw->tree->lastnode].Task;
579 
580  if(exportflag[task] != target)
581  {
582  exportflag[task] = target;
583  exportnodecount[task] = NODELISTLENGTH;
584  }
585 
586  if(exportnodecount[task] == NODELISTLENGTH)
587  {
588  /* out of buffer space. Need to interrupt. */
589  if(lv->Nexport >= lv->BunchSize) {
590  return -1;
591  }
592  /* This index is a unique entry in the global DataNodeList and DataIndexTable.*/
593  size_t nexp = lv->Nexport + lv->DataIndexOffset;
594  exportnodecount[task] = 0;
595  exportindex[task] = nexp;
596  DataIndexTable[nexp].Task = task;
597  DataIndexTable[nexp].Index = target;
598  DataIndexTable[nexp].IndexGet = nexp;
599  lv->Nexport++;
600  lv->NThisParticleExport++;
601  }
602 
603  /* Set the NodeList entry*/
604  DataNodeList[exportindex[task]].NodeList[exportnodecount[task]++] =
605  tw->tree->TopLeaves[no - tw->tree->lastnode].treenode;
606 
607  if(exportnodecount[task] < NODELISTLENGTH)
608  DataNodeList[exportindex[task]].NodeList[exportnodecount[task]] = -1;
609  return 0;
610 }
struct topleaf_data * TopLeaves
Definition: forcetree.h:92
int lastnode
Definition: forcetree.h:86
int Task
Definition: domain.h:21
int treenode
Definition: domain.h:24

References LocalTreeWalk::BunchSize, LocalTreeWalk::DataIndexOffset, DataIndexTable, DataNodeList, endrun(), LocalTreeWalk::exportflag, LocalTreeWalk::exportindex, LocalTreeWalk::exportnodecount, data_index::Index, data_index::IndexGet, ForceTree::lastnode, LocalTreeWalk::mode, LocalTreeWalk::Nexport, data_nodelist::NodeList, NODELISTLENGTH, LocalTreeWalk::NThisParticleExport, LocalTreeWalk::target, topleaf_data::Task, data_index::Task, ForceTree::TopLeaves, TreeWalk::tree, topleaf_data::treenode, and LocalTreeWalk::tw.

Referenced by force_treeev_shortrange(), ngb_treefind_threads(), and treewalk_visit_nolist_ngbiter().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ treewalk_init_query()

static void treewalk_init_query ( TreeWalk tw,
TreeWalkQueryBase query,
int  i,
int *  NodeList 
)
static

Definition at line 243 of file treewalk.c.

244 {
245 #ifdef DEBUG
246  query->ID = P[i].ID;
247 #endif
248 
249  int d;
250  for(d = 0; d < 3; d ++) {
251  query->Pos[d] = P[i].Pos[d];
252  }
253 
254  if(NodeList) {
255  memcpy(query->NodeList, NodeList, sizeof(int) * NODELISTLENGTH);
256  } else {
257  query->NodeList[0] = tw->tree->firstnode; /* root node */
258  query->NodeList[1] = -1; /* terminate immediately */
259  /* Zero the rest of the nodelist*/
260 #ifdef DEBUG
261  memset(query->NodeList+2, 0, sizeof(int) * (NODELISTLENGTH-2));
262 #endif
263  }
264 
265  tw->fill(i, query, tw);
266 }
int firstnode
Definition: forcetree.h:84
int NodeList[NODELISTLENGTH]
Definition: treewalk.h:27
TreeWalkFillQueryFunction fill
Definition: treewalk.h:101

References TreeWalk::fill, ForceTree::firstnode, data_nodelist::NodeList, TreeWalkQueryBase::NodeList, NODELISTLENGTH, P, TreeWalkQueryBase::Pos, and TreeWalk::tree.

Referenced by real_ev().

Here is the caller graph for this function:

◆ treewalk_init_result()

static void treewalk_init_result ( TreeWalk tw,
TreeWalkResultBase result,
TreeWalkQueryBase query 
)
static

Definition at line 269 of file treewalk.c.

270 {
271  memset(result, 0, tw->result_type_elsize);
272 #ifdef DEBUG
273  result->ID = query->ID;
274 #endif
275 }

References TreeWalk::result_type_elsize.

Referenced by ev_secondary(), and real_ev().

Here is the caller graph for this function:

◆ treewalk_reduce_result()

static void treewalk_reduce_result ( TreeWalk tw,
TreeWalkResultBase result,
int  i,
enum TreeWalkReduceMode  mode 
)
static

Definition at line 278 of file treewalk.c.

279 {
280  if(tw->reduce != NULL)
281  tw->reduce(i, result, mode, tw);
282 #ifdef DEBUG
283  if(P[i].ID != result->ID)
284  endrun(2, "Mismatched ID (%ld != %ld) for particle %d in treewalk reduction, mode %d\n", P[i].ID, result->ID, i, mode);
285 #endif
286 }

References endrun(), P, and TreeWalk::reduce.

Referenced by ev_reduce_result(), and real_ev().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ treewalk_run()

void treewalk_run ( TreeWalk tw,
int *  active_set,
size_t  size 
)

Definition at line 619 of file treewalk.c.

620 {
621  if(!force_tree_allocated(tw->tree)) {
622  endrun(0, "Tree has been freed before this treewalk.\n");
623  }
624 
625  GDB_current_ev = tw;
626 
627  ev_begin(tw, active_set, size);
628 
629  if(tw->preprocess) {
630  int64_t i;
631  #pragma omp parallel for
632  for(i = 0; i < tw->WorkSetSize; i ++) {
633  const int p_i = tw->WorkSet ? tw->WorkSet[i] : i;
634  tw->preprocess(p_i, tw);
635  }
636  }
637 
638  if(tw->visit) {
639  tw->Nexportfull = 0;
640  tw->evaluated = NULL;
641  do
642  {
643  /* Keep track of which particles have been evaluated across buffer fill ups.
644  * Do this if we are not allowed to evaluate anything twice,
645  * or if the buffer filled up already.*/
646  if((!tw->evaluated) && (tw->Nexportfull == 1 || tw->repeatdisallowed)) {
647  tw->evaluated = (char *) mymalloc("evaluated", sizeof(char)*tw->WorkSetSize);
648  memset(tw->evaluated, 0, sizeof(char)*tw->WorkSetSize);
649  }
650  ev_primary(tw); /* do local particles and prepare export list */
651  /* exchange particle data */
652  const struct SendRecvBuffer sndrcv = ev_get_remote(tw);
653  /* now do the particles that were sent to us */
654  ev_secondary(tw);
655 
656  /* import the result to local particles */
657  ev_reduce_result(sndrcv, tw);
658 
659  tw->Nexportfull ++;
660  tw->Nexport_sum += tw->Nexport;
661  ta_free(sndrcv.Send_count);
662  } while(ev_ndone(tw) < tw->NTask);
663  if(tw->evaluated)
664  myfree(tw->evaluated);
665  }
666 
667 #ifdef DEBUG
668  /*int64_t totNodesinlist, totlist;
669  MPI_Reduce(&tw->Nnodesinlist, &totNodesinlist, 1, MPI_INT64, MPI_SUM, 0, MPI_COMM_WORLD);
670  MPI_Reduce(&tw->Nlist, &totlist, 1, MPI_INT64, MPI_SUM, 0, MPI_COMM_WORLD);
671  message(0, "Nodes in nodelist: %g (avg). %ld nodes, %ld lists\n", ((double) totNodesinlist)/totlist, totlist, totNodesinlist);*/
672 #endif
673  double tstart, tend;
674 
675  tstart = second();
676 
677  if(tw->postprocess) {
678  int64_t i;
679  #pragma omp parallel for
680  for(i = 0; i < tw->WorkSetSize; i ++) {
681  const int p_i = tw->WorkSet ? tw->WorkSet[i] : i;
682  tw->postprocess(p_i, tw);
683  }
684  }
685  tend = second();
686  tw->timecomp3 = timediff(tstart, tend);
687  ev_finish(tw);
688  tw->Niteration++;
689 }
int force_tree_allocated(const ForceTree *tree)
Definition: forcetree.c:134
TreeWalkProcessFunction preprocess
Definition: treewalk.h:105
double timecomp3
Definition: treewalk.h:125
int64_t Nexport_sum
Definition: treewalk.h:137
TreeWalkProcessFunction postprocess
Definition: treewalk.h:104
int64_t Nexportfull
Definition: treewalk.h:139
int repeatdisallowed
Definition: treewalk.h:117
static void ev_begin(TreeWalk *tw, int *active_set, const size_t size)
Definition: treewalk.c:161
static void ev_finish(TreeWalk *tw)
Definition: treewalk.c:229
static void ev_reduce_result(const struct SendRecvBuffer sndrcv, TreeWalk *tw)
Definition: treewalk.c:810
static void ev_primary(TreeWalk *tw)
Definition: treewalk.c:466
static void ev_secondary(TreeWalk *tw)
Definition: treewalk.c:526
static TreeWalk * GDB_current_ev
Definition: treewalk.c:115
static struct SendRecvBuffer ev_get_remote(TreeWalk *tw)
Definition: treewalk.c:711
static int ev_ndone(TreeWalk *tw)
Definition: treewalk.c:513

References endrun(), ev_begin(), ev_finish(), ev_get_remote(), ev_ndone(), ev_primary(), ev_reduce_result(), ev_secondary(), TreeWalk::evaluated, force_tree_allocated(), GDB_current_ev, myfree, mymalloc, TreeWalk::Nexport, TreeWalk::Nexport_sum, TreeWalk::Nexportfull, TreeWalk::Niteration, TreeWalk::NTask, TreeWalk::postprocess, TreeWalk::preprocess, TreeWalk::repeatdisallowed, second(), SendRecvBuffer::Send_count, ta_free, TreeWalk::timecomp3, timediff(), TreeWalk::tree, TreeWalk::visit, TreeWalk::WorkSet, and TreeWalk::WorkSetSize.

Referenced by blackhole(), fof_label_primary(), fof_label_secondary(), grav_short_pair(), grav_short_tree(), hydro_force(), ionize_all_part(), metal_return(), treewalk_do_hsml_loop(), and winds_and_feedback().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ treewalk_visit_ngbiter()

int treewalk_visit_ngbiter ( TreeWalkQueryBase I,
TreeWalkResultBase O,
LocalTreeWalk lv 
)

Definition at line 924 of file treewalk.c.

927 {
928 
930 
931  /* Kick-start the iteration with other == -1 */
932  iter->other = -1;
933  lv->tw->ngbiter(I, O, iter, lv);
934 #ifdef DEBUG
935  /* Check whether the tree contains the particles we are looking for*/
936  if((lv->tw->tree->mask & iter->mask) == 0)
937  endrun(5, "Tried to run treewalk for particle mask %d but tree mask is %d.\n", iter->mask, lv->tw->tree->mask);
938 #endif
939  const double BoxSize = lv->tw->tree->BoxSize;
940 
941  int ninteractions = 0;
942  int inode = 0;
943 
944  for(inode = 0; (lv->mode == 0 && inode < 1)|| (lv->mode == 1 && inode < NODELISTLENGTH && I->NodeList[inode] >= 0); inode++)
945  {
946  int numcand = ngb_treefind_threads(I, O, iter, I->NodeList[inode], lv);
947  /* Export buffer is full end prematurally */
948  if(numcand < 0) return numcand;
949 
950  /* If we are here, export is succesful. Work on the this particle -- first
951  * filter out all of the candidates that are actually outside. */
952  int numngb;
953 
954  for(numngb = 0; numngb < numcand; numngb ++) {
955  int other = lv->ngblist[numngb];
956 
957  /* Skip garbage*/
958  if(P[other].IsGarbage)
959  continue;
960  /* In case the type of the particle has changed since the tree was built.
961  * Happens for wind treewalk for gas turned into stars on this timestep.*/
962  if(!((1<<P[other].Type) & iter->mask)) {
963  continue;
964  }
965 
966  double dist;
967 
968  if(iter->symmetric == NGB_TREEFIND_SYMMETRIC) {
969  dist = DMAX(P[other].Hsml, iter->Hsml);
970  } else {
971  dist = iter->Hsml;
972  }
973 
974  double r2 = 0;
975  int d;
976  double h2 = dist * dist;
977  for(d = 0; d < 3; d ++) {
978  /* the distance vector points to 'other' */
979  iter->dist[d] = NEAREST(I->Pos[d] - P[other].Pos[d], BoxSize);
980  r2 += iter->dist[d] * iter->dist[d];
981  if(r2 > h2) break;
982  }
983  if(r2 > h2) continue;
984 
985  /* update the iter and call the iteration function*/
986  iter->r2 = r2;
987  iter->r = sqrt(r2);
988  iter->other = other;
989 
990  lv->tw->ngbiter(I, O, iter, lv);
991  }
992 
993  ninteractions += numngb;
994  }
995 
996  lv->Ninteractions += ninteractions;
997  if(lv->mode == 1) {
998  lv->Nnodesinlist += inode;
999  lv->Nlist += 1;
1000  }
1001  return 0;
1002 }
int mask
Definition: forcetree.h:90
double dist[3]
Definition: treewalk.h:40
size_t ngbiter_type_elsize
Definition: treewalk.h:97
TreeWalkNgbIterFunction ngbiter
Definition: treewalk.h:103
static int ngb_treefind_threads(TreeWalkQueryBase *I, TreeWalkResultBase *O, TreeWalkNgbIterBase *iter, int startnode, LocalTreeWalk *lv)
Definition: treewalk.c:1052

References ForceTree::BoxSize, TreeWalkNgbIterBase::dist, DMAX, endrun(), TreeWalkNgbIterBase::Hsml, ForceTree::mask, TreeWalkNgbIterBase::mask, LocalTreeWalk::mode, NEAREST, NGB_TREEFIND_SYMMETRIC, ngb_treefind_threads(), TreeWalk::ngbiter, TreeWalk::ngbiter_type_elsize, LocalTreeWalk::ngblist, LocalTreeWalk::Ninteractions, LocalTreeWalk::Nlist, LocalTreeWalk::Nnodesinlist, TreeWalkQueryBase::NodeList, TreeWalkNgbIterBase::other, P, TreeWalkQueryBase::Pos, TreeWalkNgbIterBase::r, TreeWalkNgbIterBase::r2, TreeWalkNgbIterBase::symmetric, TreeWalk::tree, and LocalTreeWalk::tw.

Referenced by blackhole(), fof_label_primary(), grav_short_pair(), hydro_force(), ionize_all_part(), and metal_return().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ treewalk_visit_nolist_ngbiter()

int treewalk_visit_nolist_ngbiter ( TreeWalkQueryBase I,
TreeWalkResultBase O,
LocalTreeWalk lv 
)

Definition at line 1143 of file treewalk.c.

1146 {
1148 
1149  /* Kick-start the iteration with other == -1 */
1150  iter->other = -1;
1151  lv->tw->ngbiter(I, O, iter, lv);
1152 
1153  int inode;
1154  for(inode = 0; (lv->mode == 0 && inode < 1)|| (lv->mode == 1 && inode < NODELISTLENGTH && I->NodeList[inode] >= 0); inode++)
1155  {
1156  int no = I->NodeList[inode];
1157  const ForceTree * tree = lv->tw->tree;
1158  const double BoxSize = tree->BoxSize;
1159 
1160  while(no >= 0)
1161  {
1162  struct NODE *current = &tree->Nodes[no];
1163 
1164  /* When walking exported particles we start from the encompassing top-level node,
1165  * so if we get back to a top-level node again we are done.*/
1166  if(lv->mode == 1) {
1167  /* The first node is always top-level*/
1168  if(current->f.TopLevel && no != I->NodeList[inode]) {
1169  /* we reached a top-level node again, which means that we are done with the branch */
1170  break;
1171  }
1172  }
1173 
1174  /* Cull the node */
1175  if(0 == cull_node(I, iter, current, BoxSize)) {
1176  /* in case the node can be discarded */
1177  no = current->sibling;
1178  continue;
1179  }
1180 
1181  /* Node contains relevant particles, add them.*/
1182  if(current->f.ChildType == PARTICLE_NODE_TYPE) {
1183  int i;
1184  int * suns = current->s.suns;
1185  for (i = 0; i < current->s.noccupied; i++) {
1186  /* must be the correct type: compare the
1187  * current type for this subnode extracted
1188  * from the bitfield to the mask.*/
1189  int type = (current->s.Types >> (3*i)) % 8;
1190 
1191  if(!((1<<type) & iter->mask))
1192  continue;
1193 
1194  /* Now evaluate a particle for the list*/
1195  int other = suns[i];
1196  /* Skip garbage*/
1197  if(P[other].IsGarbage)
1198  continue;
1199  /* In case the type of the particle has changed since the tree was built.
1200  * Happens for wind treewalk for gas turned into stars on this timestep.*/
1201  if(!((1<<P[other].Type) & iter->mask))
1202  continue;
1203 
1204  double dist = iter->Hsml;
1205  double r2 = 0;
1206  int d;
1207  double h2 = dist * dist;
1208  for(d = 0; d < 3; d ++) {
1209  /* the distance vector points to 'other' */
1210  iter->dist[d] = NEAREST(I->Pos[d] - P[other].Pos[d], BoxSize);
1211  r2 += iter->dist[d] * iter->dist[d];
1212  if(r2 > h2) break;
1213  }
1214  if(r2 > h2) continue;
1215 
1216  /* update the iter and call the iteration function*/
1217  iter->r2 = r2;
1218  iter->other = other;
1219  iter->r = sqrt(r2);
1220  lv->tw->ngbiter(I, O, iter, lv);
1221  }
1222  /* Move sideways*/
1223  no = current->sibling;
1224  continue;
1225  }
1226  else if(current->f.ChildType == PSEUDO_NODE_TYPE) {
1227  /* pseudo particle */
1228  if(lv->mode == 1) {
1229  endrun(12312, "Secondary for particle %d from node %d found pseudo at %d.\n", lv->target, I->NodeList[inode], current);
1230  } else {
1231  /* Export the pseudo particle*/
1232  if(-1 == treewalk_export_particle(lv, current->s.suns[0]))
1233  return -1;
1234  /* Move sideways*/
1235  no = current->sibling;
1236  continue;
1237  }
1238  }
1239  /* ok, we need to open the node */
1240  no = current->s.suns[0];
1241  }
1242  }
1243 
1244  if(lv->mode == 1) {
1245  lv->Nnodesinlist += inode;
1246  lv->Nlist += 1;
1247  }
1248  return 0;
1249 }

References ForceTree::BoxSize, NODE::ChildType, cull_node(), TreeWalkNgbIterBase::dist, endrun(), NODE::f, TreeWalkNgbIterBase::Hsml, TreeWalkNgbIterBase::mask, LocalTreeWalk::mode, NEAREST, TreeWalk::ngbiter, TreeWalk::ngbiter_type_elsize, LocalTreeWalk::Nlist, LocalTreeWalk::Nnodesinlist, NodeChild::noccupied, TreeWalkQueryBase::NodeList, ForceTree::Nodes, TreeWalkNgbIterBase::other, P, PARTICLE_NODE_TYPE, TreeWalkQueryBase::Pos, PSEUDO_NODE_TYPE, TreeWalkNgbIterBase::r, TreeWalkNgbIterBase::r2, NODE::s, NODE::sibling, NodeChild::suns, LocalTreeWalk::target, NODE::TopLevel, TreeWalk::tree, treewalk_export_particle(), LocalTreeWalk::tw, and NodeChild::Types.

Referenced by density(), fof_label_secondary(), stellar_density(), and winds_find_weights().

Here is the call graph for this function:
Here is the caller graph for this function:

Variable Documentation

◆ DataIndexTable

struct data_index * DataIndexTable
static

◆ DataNodeList

struct data_nodelist * DataNodeList
static

the particles to be exported are grouped by task-number. This table allows the results to be disentangled again and to be assigned to the correct particle

Referenced by ev_begin(), ev_finish(), and treewalk_export_particle().

◆ GDB_current_ev

TreeWalk* GDB_current_ev = NULL
static

Definition at line 115 of file treewalk.c.

Referenced by treewalk_run().

◆ ImportBufferBoost

int ImportBufferBoost
static

Definition at line 36 of file treewalk.c.

Referenced by ev_begin(), and set_treewalk_params().