MP-Gadget  5.0.1.dev1-76bc7d4726-dirty
Classes | Macros | Functions | Variables
domain.c File Reference

code for domain decomposition More...

#include <mpi.h>
#include <stdio.h>
#include <stdlib.h>
#include <stddef.h>
#include <string.h>
#include <math.h>
#include <stdint.h>
#include <omp.h>
#include "utils.h"
#include "domain.h"
#include "timestep.h"
#include "timebinmgr.h"
#include "exchange.h"
#include "slotsmanager.h"
#include "partmanager.h"
#include "walltime.h"
#include "utils/paramset.h"
#include "utils/peano.h"
#include "utils/mpsort.h"
Include dependency graph for domain.c:

Go to the source code of this file.

Classes

struct  DomainDecompositionPolicy
 
struct  local_topnode_data
 
struct  local_particle_data
 
struct  topleaf_extdata
 

Macros

#define TAG_GRAV_A   18
 
#define TAG_GRAV_B   19
 

Functions

void set_domain_par (DomainParams dp)
 
void set_domain_params (ParameterSet *ps)
 
static int order_by_key (const void *a, const void *b)
 
static void mp_order_by_key (const void *data, void *radix, void *arg)
 
static void domain_assign_balanced (DomainDecomp *ddecomp, int64_t *cost, const int NsegmentPerTask)
 
static int domain_allocate (DomainDecomp *ddecomp, DomainDecompositionPolicy *policy)
 
static int domain_check_memory_bound (const DomainDecomp *ddecomp, int64_t *TopLeafWork, int64_t *TopLeafCount)
 
static int domain_attempt_decompose (DomainDecomp *ddecomp, DomainDecompositionPolicy *policy, const int MaxTopNodes)
 
static int domain_balance (DomainDecomp *ddecomp)
 
static int domain_determine_global_toptree (DomainDecompositionPolicy *policy, struct local_topnode_data *topTree, int *topTreeSize, const int MaxTopNodes, MPI_Comm DomainComm)
 
static void domain_compute_costs (const DomainDecomp *ddecomp, int64_t *TopLeafWork, int64_t *TopLeafCount)
 
static void domain_toptree_merge (struct local_topnode_data *treeA, struct local_topnode_data *treeB, int noA, int noB, int *treeASize, const int MaxTopNodes)
 
static int domain_check_for_local_refine_subsample (DomainDecompositionPolicy *policy, struct local_topnode_data *topTree, int *topTreeSize, const int MaxTopNodes)
 
static int domain_global_refine (struct local_topnode_data *topTree, int *topTreeSize, const int MaxTopNodes, int64_t countlimit, int64_t costlimit)
 
static void domain_create_topleaves (DomainDecomp *ddecomp, int no, int *next)
 
static int domain_layoutfunc (int n, const void *userdata)
 
static int domain_policies_init (DomainDecompositionPolicy policies[], const int NincreaseAlloc, const int SwitchToGlobal)
 
void domain_decompose_full (DomainDecomp *ddecomp)
 
void domain_maintain (DomainDecomp *ddecomp, struct DriftData *drift)
 
void domain_free (DomainDecomp *ddecomp)
 
static int topleaf_ext_order_by_task_and_key (const void *c1, const void *c2)
 
static int topleaf_ext_order_by_key (const void *c1, const void *c2)
 
static int domain_toptree_get_subnode (struct local_topnode_data *topTree, peano_t key)
 
static int domain_toptree_insert (struct local_topnode_data *topTree, peano_t Key, int64_t cost)
 
static int domain_toptree_split (struct local_topnode_data *topTree, int *topTreeSize, const int MaxTopNodes, int i)
 
static void domain_toptree_update_cost (struct local_topnode_data *topTree, int start)
 
static void domain_toptree_truncate_r (struct local_topnode_data *topTree, int start, int64_t countlimit, int64_t costlimit)
 
static void domain_toptree_garbage_collection (struct local_topnode_data *topTree, int start, int *last_free)
 
static void domain_toptree_truncate (struct local_topnode_data *topTree, int *topTreeSize, int64_t countlimit, int64_t costlimit)
 
static int domain_nonrecursively_combine_topTree (struct local_topnode_data *topTree, int *topTreeSize, const int MaxTopNodes, MPI_Comm DomainComm)
 

Variables

static DomainParams domain_params
 

Detailed Description

code for domain decomposition

This file contains the code for the domain decomposition of the simulation volume. The domains are constructed from disjoint subsets of the leaves of a fiducial top-level tree that covers the full simulation volume. Domain boundaries hence run along tree-node divisions of a fiducial global BH tree. As a result of this method, the tree force are in principle strictly independent of the way the domains are cut. The domain decomposition can be carried out for an arbitrary number of CPUs. Individual domains are not cubical, but spatially coherent since the leaves are traversed in a Peano-Hilbert order and individual domains form segments along this order. This also ensures that each domain has a small surface to volume ratio, which minimizes communication.

Definition in file domain.c.

Macro Definition Documentation

◆ TAG_GRAV_A

#define TAG_GRAV_A   18

Definition at line 23 of file domain.c.

◆ TAG_GRAV_B

#define TAG_GRAV_B   19

Definition at line 24 of file domain.c.

Function Documentation

◆ domain_allocate()

static int domain_allocate ( DomainDecomp ddecomp,
DomainDecompositionPolicy policy 
)
static

This function allocates all the stuff that will be required for the tree-construction/walk later on

Definition at line 285 of file domain.c.

286 {
287  size_t bytes, all_bytes = 0;
288 
289  int MaxTopNodes = (int) (policy->TopNodeAllocFactor * PartManager->MaxPart + 1);
290 
291  /* Build the domain over the global all-processors communicator.
292  * We use a symbol in case we want to do fancy things in the future.*/
293  ddecomp->DomainComm = MPI_COMM_WORLD;
294 
295  int NTask;
296  MPI_Comm_size(ddecomp->DomainComm, &NTask);
297 
298  /* Add a tail item to avoid special treatments */
299  ddecomp->Tasks = (struct task_data *) mymalloc2("Tasks", bytes = ((NTask + 1)* sizeof(ddecomp->Tasks[0])));
300 
301  all_bytes += bytes;
302 
303  ddecomp->TopNodes = (struct topnode_data *) mymalloc("TopNodes",
304  bytes = (MaxTopNodes * (sizeof(ddecomp->TopNodes[0]))));
305 
306  all_bytes += bytes;
307 
308  ddecomp->TopLeaves = (struct topleaf_data *) mymalloc("TopLeaves",
309  bytes = (MaxTopNodes * sizeof(ddecomp->TopLeaves[0])));
310 
311  all_bytes += bytes;
312 
313  message(0, "Allocated %g MByte for top-level domain structure\n", all_bytes / (1024.0 * 1024.0));
314 
315  ddecomp->domain_allocated_flag = 1;
316 
317  return MaxTopNodes;
318 }
void message(int where, const char *fmt,...)
Definition: endrun.c:175
#define mymalloc(name, size)
Definition: mymalloc.h:15
#define mymalloc2(name, size)
Definition: mymalloc.h:16
struct part_manager_type PartManager[1]
Definition: partmanager.c:11
struct topnode_data * TopNodes
Definition: domain.h:36
struct task_data * Tasks
Definition: domain.h:41
MPI_Comm DomainComm
Definition: domain.h:44
struct topleaf_data * TopLeaves
Definition: domain.h:37
int domain_allocated_flag
Definition: domain.h:34
int NTask
Definition: test_exchange.c:23

References DomainDecomp::domain_allocated_flag, DomainDecomp::DomainComm, part_manager_type::MaxPart, message(), mymalloc, mymalloc2, NTask, PartManager, DomainDecomp::Tasks, DomainDecomp::TopLeaves, DomainDecompositionPolicy::TopNodeAllocFactor, and DomainDecomp::TopNodes.

Referenced by domain_decompose_full().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ domain_assign_balanced()

static void domain_assign_balanced ( DomainDecomp ddecomp,
int64_t *  cost,
const int  NsegmentPerTask 
)
static

This function assigns TopLeaves to Segments, trying to ensure uniform cost by assigning TopLeaves (contiguously) until a Segment has a desired size. Then each Segment is assigned to a Task in a round-robin fashion starting from the largest Segment until all Segments are assigned. At the moment the Segment/Task distinction is not used: we call this with NSegmentPerTask = 1, because we are able to create Segments of roughly equal cost from the TopLeaves.

This creates the index in Tasks[Task].StartLeaf and Tasks[Task].EndLeaf cost is the cost per TopLeaves

Definition at line 533 of file domain.c.

534 {
535  int NTask;
536  MPI_Comm_size(ddecomp->DomainComm, &NTask);
537 
538  int Nsegment = NTask * NsegmentPerTask;
539  /* we work with TopLeafExt then replace TopLeaves*/
540 
541  struct topleaf_extdata * TopLeafExt;
542 
543  /* A Segment is a subset of the TopLeaf nodes */
544 
545  TopLeafExt = (struct topleaf_extdata *) mymalloc("TopLeafExt", ddecomp->NTopLeaves * sizeof(TopLeafExt[0]));
546 
547  /* copy the data over */
548  int i;
549  for(i = 0; i < ddecomp->NTopLeaves; i ++) {
550  TopLeafExt[i].topnode = ddecomp->TopLeaves[i].topnode;
551  TopLeafExt[i].Key = ddecomp->TopNodes[ddecomp->TopLeaves[i].topnode].StartKey;
552  TopLeafExt[i].Task = -1;
553  TopLeafExt[i].cost = cost[i];
554  }
555 
556  /* make sure TopLeaves are sorted by Key for locality of segments -
557  * likely not necessary be cause when this function
558  * is called it is already true */
559  qsort_openmp(TopLeafExt, ddecomp->NTopLeaves, sizeof(TopLeafExt[0]), topleaf_ext_order_by_key);
560 
561  int64_t totalcost = 0;
562  #pragma omp parallel for reduction(+ : totalcost)
563  for(i = 0; i < ddecomp->NTopLeaves; i ++) {
564  totalcost += TopLeafExt[i].cost;
565  }
566  int64_t totalcostLeft = totalcost;
567 
568  /* start the assignment; We try to create segments that are of the
569  * mean_expected cost, then assign them to Tasks in a round-robin fashion.
570  * The default is that there is one segment per task, which makes the assignment trivial.
571  */
572 
573  double mean_expected = 1.0 * totalcost / Nsegment;
574  double mean_task = 1.0 * totalcost /NTask;
575  int curleaf = 0;
576  int curseg = 0;
577  int curtask = 0; /* between 0 and NTask - 1*/
578  int nrounds = 0; /*Number of times we looped*/
579  int64_t curload = 0; /* cumulative load for the current segment */
580  int64_t curtaskload = 0; /* cumulative load for current task */
581  int64_t maxleafcost = 0;
582 
583  message(0, "Expected segment cost %g\n", mean_expected);
584  /* we maintain that after the loop curleaf is the number of leaves scanned,
585  * curseg is number of segments created.
586  * */
587  while(nrounds < ddecomp->NTopLeaves) {
588  int append = 0;
589  int advance = 0;
590  if(curleaf == ddecomp->NTopLeaves) {
591  /* to maintain the invariance */
592  advance = 1;
593  } else if(ddecomp->NTopLeaves - curleaf == Nsegment - curseg) {
594  /* just enough for one segment per leaf; this line ensures
595  * at least Nsegment segments are created. */
596  append = 1;
597  advance = 1;
598  } else {
599  /* append a leaf to the segment if there is room left.
600  * Calculate room left based on a rolling average of the total so far..*/
601  int64_t totalassigned = (totalcost - totalcostLeft) + curload;
602  if((mean_expected * (curseg +1) - totalassigned > 0.5 * TopLeafExt[curleaf].cost)
603  || curload == 0 /* but at least add one leaf */
604  ) {
605  append = 1;
606  } else {
607  /* will be too big of a segment, cut it */
608  advance = 1;
609  }
610  }
611  if(append) {
612  /* assign the leaf to the task */
613  curload += TopLeafExt[curleaf].cost;
614  TopLeafExt[curleaf].Task = curtask;
615  curleaf ++;
616  }
617 
618  if(advance) {
619  /*Add this segment to the current task*/
620  curtaskload += curload;
621  /* If we have allocated enough segments to fill this processor,
622  * or if we have one segment per task left, proceed to the next task.
623  * We do not use round robin so that neighbouring (by key)
624  * topTree are on the same processor.*/
625  if((mean_task - curtaskload < 0.5 * mean_expected) || (Nsegment - curseg <= NTask - curtask)
626  ){
627  curtaskload = 0;
628  curtask ++;
629  }
630  /* move on to the next segment.*/
631  totalcostLeft -= curload;
632  curload = 0;
633  curseg ++;
634 
635  /* finished a round for all tasks */
636  if(curtask == NTask) {
637  /*Back to task zero*/
638  curtask = 0;
639  /* Need a new mean_expected value: we want to
640  * divide the remaining segments evenly between the processors.*/
641  mean_expected = 1.0 * totalcostLeft / Nsegment;
642  mean_task = 1.0 * totalcostLeft / NTask;
643  nrounds++;
644  }
645  if(curleaf == ddecomp->NTopLeaves)
646  break;
647  if(TopLeafExt[curleaf].cost > maxleafcost)
648  maxleafcost = TopLeafExt[curleaf].cost;
649  }
650  //message(0, "curleaf = %d advance = %d append = %d, curload = %d cost=%ld left=%ld\n", curleaf, advance, append, curload, TopLeafExt[curleaf].cost, totalcostLeft);
651  }
652 
653  /*In most 'normal' cases nrounds == 1 here*/
654  message(0, "Created %d segments in %d rounds. Max leaf cost: %g\n", curseg, nrounds, (1.0*maxleafcost)/(totalcost) * Nsegment);
655 
656  if(curseg < Nsegment) {
657  endrun(0, "Not enough segments were created (%d instead of %d). This should not happen.\n", curseg, Nsegment);
658  }
659 
660  if(totalcostLeft != 0) {
661  endrun(0, "Assertion failed. Total cost is not fully assigned to all ranks\n");
662  }
663 
664  /* lets rearrange the TopLeafExt by task, such that we can build the Tasks table */
665  qsort_openmp(TopLeafExt, ddecomp->NTopLeaves, sizeof(TopLeafExt[0]), topleaf_ext_order_by_task_and_key);
666  for(i = 0; i < ddecomp->NTopLeaves; i ++) {
667  ddecomp->TopNodes[TopLeafExt[i].topnode].Leaf = i;
668  ddecomp->TopLeaves[i].Task = TopLeafExt[i].Task;
669  ddecomp->TopLeaves[i].topnode = TopLeafExt[i].topnode;
670  }
671 
672  myfree(TopLeafExt);
673  /* here we reduce the number of code branches by adding an item to the end. */
674  ddecomp->TopLeaves[ddecomp->NTopLeaves].Task = NTask;
675  ddecomp->TopLeaves[ddecomp->NTopLeaves].topnode = -1;
676 
677  int ta = 0;
678  ddecomp->Tasks[ta].StartLeaf = 0;
679  for(i = 0; i <= ddecomp->NTopLeaves; i ++) {
680 
681  if(ddecomp->TopLeaves[i].Task == ta) continue;
682 
683  ddecomp->Tasks[ta].EndLeaf = i;
684  ta ++;
685  while(ta < ddecomp->TopLeaves[i].Task) {
686  ddecomp->Tasks[ta].EndLeaf = i;
687  ddecomp->Tasks[ta].StartLeaf = i;
688  ta ++;
689  }
690  /* the last item will set Tasks[NTask], but we allocated memory for it already */
691  ddecomp->Tasks[ta].StartLeaf = i;
692  }
693  if(ta != NTask) {
694  endrun(0, "Assertion failed: not all tasks are assigned. This indicates a bug.\n");
695  }
696 }
static int topleaf_ext_order_by_task_and_key(const void *c1, const void *c2)
Definition: domain.c:498
static int topleaf_ext_order_by_key(const void *c1, const void *c2)
Definition: domain.c:510
void endrun(int where, const char *fmt,...)
Definition: endrun.c:147
#define myfree(x)
Definition: mymalloc.h:19
int NTopLeaves
Definition: domain.h:40
int EndLeaf
Definition: domain.h:30
int StartLeaf
Definition: domain.h:29
int topnode
Definition: domain.h:23
int Task
Definition: domain.h:21
peano_t Key
Definition: domain.c:491
int64_t cost
Definition: domain.c:494
peano_t StartKey
Definition: domain.h:14
int Leaf
Definition: domain.h:17
#define qsort_openmp
Definition: test_exchange.c:14

References topleaf_extdata::cost, DomainDecomp::DomainComm, task_data::EndLeaf, endrun(), topleaf_extdata::Key, topnode_data::Leaf, message(), myfree, mymalloc, NTask, DomainDecomp::NTopLeaves, qsort_openmp, topnode_data::StartKey, task_data::StartLeaf, topleaf_extdata::Task, topleaf_data::Task, DomainDecomp::Tasks, topleaf_ext_order_by_key(), topleaf_ext_order_by_task_and_key(), DomainDecomp::TopLeaves, topleaf_extdata::topnode, topleaf_data::topnode, and DomainDecomp::TopNodes.

Referenced by domain_balance().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ domain_attempt_decompose()

static int domain_attempt_decompose ( DomainDecomp ddecomp,
DomainDecompositionPolicy policy,
const int  MaxTopNodes 
)
static

This function carries out the actual domain decomposition for all particle types. It will try to balance the work-load for each ddecomp, as estimated based on the P[i]-GravCost values. The decomposition will respect the maximum allowed memory-imbalance given by the value of PartAllocFactor.

Definition at line 338 of file domain.c.

339 {
340  int i;
341 
342  size_t bytes, all_bytes = 0;
343 
344  /* points to the root node of the top-level tree */
345  struct local_topnode_data *topTree = (struct local_topnode_data *) mymalloc("LocaltopTree", bytes =
346  (MaxTopNodes * sizeof(struct local_topnode_data)));
347  memset(topTree, 0, sizeof(topTree[0]) * MaxTopNodes);
348  all_bytes += bytes;
349 
350  message(0, "use of %g MB of temporary storage for domain decomposition... (presently allocated=%g MB)\n",
351  all_bytes / (1024.0 * 1024.0), mymalloc_usedbytes() / (1024.0 * 1024.0));
352 
353  report_memory_usage("DOMAIN");
354 
355  walltime_measure("/Domain/Decompose/Misc");
356 
357  if(domain_determine_global_toptree(policy, topTree, &ddecomp->NTopNodes, MaxTopNodes, ddecomp->DomainComm)) {
358  myfree(topTree);
359  return 1;
360  }
361 
362  /* copy what we need for the topnodes */
363  for(i = 0; i < ddecomp->NTopNodes; i++)
364  {
365  ddecomp->TopNodes[i].StartKey = topTree[i].StartKey;
366  ddecomp->TopNodes[i].Shift = topTree[i].Shift;
367  ddecomp->TopNodes[i].Daughter = topTree[i].Daughter;
368  ddecomp->TopNodes[i].Leaf = -1; /* will be assigned by create_topleaves*/
369  }
370 
371  myfree(topTree);
372 
373  ddecomp->NTopLeaves = 0;
374  domain_create_topleaves(ddecomp, 0, &ddecomp->NTopLeaves);
375 
376  message(0, "NTopLeaves= %d NTopNodes=%d (space for %d)\n", ddecomp->NTopLeaves, ddecomp->NTopNodes, MaxTopNodes);
377 
378  walltime_measure("/Domain/DetermineTopTree/CreateLeaves");
379 
380  if(ddecomp->NTopLeaves < policy->NTopLeaves) {
381  message(0, "Number of Topleaves is less than required over decomposition");
382  }
383 
384  int NTask;
385  MPI_Comm_size(ddecomp->DomainComm, &NTask);
386 
387  /* this is fatal */
388  if(ddecomp->NTopLeaves < NTask) {
389  endrun(0, "Number of Topleaves is less than NTask");
390  }
391 
392  return 0;
393 }
static int domain_determine_global_toptree(DomainDecompositionPolicy *policy, struct local_topnode_data *topTree, int *topTreeSize, const int MaxTopNodes, MPI_Comm DomainComm)
Definition: domain.c:1147
static void domain_create_topleaves(DomainDecomp *ddecomp, int no, int *next)
Definition: domain.c:721
#define mymalloc_usedbytes()
Definition: mymalloc.h:32
#define report_memory_usage(x)
Definition: mymalloc.h:30
int NTopNodes
Definition: domain.h:39
peano_t StartKey
Definition: domain.c:62
int Daughter
Definition: domain.h:15
int Shift
Definition: domain.h:16
#define walltime_measure(name)
Definition: walltime.h:8

References local_topnode_data::Daughter, topnode_data::Daughter, domain_create_topleaves(), domain_determine_global_toptree(), DomainDecomp::DomainComm, endrun(), topnode_data::Leaf, message(), myfree, mymalloc, mymalloc_usedbytes, NTask, DomainDecompositionPolicy::NTopLeaves, DomainDecomp::NTopLeaves, DomainDecomp::NTopNodes, report_memory_usage, local_topnode_data::Shift, topnode_data::Shift, local_topnode_data::StartKey, topnode_data::StartKey, DomainDecomp::TopNodes, and walltime_measure.

Referenced by domain_decompose_full().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ domain_balance()

static int domain_balance ( DomainDecomp ddecomp)
static

attempt to assign segments to tasks such that the load or work is balanced.

< a table that gives the total number of particles held by each processor

Definition at line 400 of file domain.c.

401 {
403  int64_t * TopLeafCount = (int64_t *) mymalloc("TopLeafCount", ddecomp->NTopLeaves * sizeof(TopLeafCount[0]));
404 
405  domain_compute_costs(ddecomp, NULL, TopLeafCount);
406 
407  walltime_measure("/Domain/Decompose/Sumcost");
408 
409  /* first try work balance */
410  domain_assign_balanced(ddecomp, TopLeafCount, 1);
411 
412  walltime_measure("/Domain/Decompose/assignbalance");
413 
414  int status = domain_check_memory_bound(ddecomp, NULL, TopLeafCount);
415  if(status != 0)
416  message(0, "Domain decomposition is outside memory bounds.\n");
417 
418  walltime_measure("/Domain/Decompose/memorybound");
419 
420  myfree(TopLeafCount);
421 
422  return status;
423 }
static int domain_check_memory_bound(const DomainDecomp *ddecomp, int64_t *TopLeafWork, int64_t *TopLeafCount)
Definition: domain.c:426
static void domain_compute_costs(const DomainDecomp *ddecomp, int64_t *TopLeafWork, int64_t *TopLeafCount)
Definition: domain.c:1280
static void domain_assign_balanced(DomainDecomp *ddecomp, int64_t *cost, const int NsegmentPerTask)
Definition: domain.c:533

References domain_assign_balanced(), domain_check_memory_bound(), domain_compute_costs(), message(), myfree, mymalloc, DomainDecomp::NTopLeaves, and walltime_measure.

Referenced by domain_decompose_full().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ domain_check_for_local_refine_subsample()

static int domain_check_for_local_refine_subsample ( DomainDecompositionPolicy policy,
struct local_topnode_data topTree,
int *  topTreeSize,
const int  MaxTopNodes 
)
static

This function performs local refinement of the topTree.

It creates the local refinement by quickly going through a skeleton tree of a fraction (subsample) of all local particles.

Next, we add flesh to the skeleton: all particles are deposited to the tree.

Finally, in _truncation(), we chop off the cheap branches from the skeleton, terminating the branches when the cost and count are both sufficiently small.

We do not use the full particle data. Sufficient to use LP, which contains the peano key and cost of each particle, and sorted by the peano key.

Definition at line 895 of file domain.c.

899 {
900 
901  int i;
902 
903  struct local_particle_data * LP = (struct local_particle_data*) mymalloc("LocalParticleData", PartManager->NumPart * sizeof(LP[0]));
904 
905  /* Watchout : Peano/Morton ordering is required by the tree
906  * building algorithm in local_refine.
907  *
908  * We can either use a global or a local sorting here; the code will run
909  * without crashing.
910  *
911  * A global sorting is chosen to ensure the local topTrees are really local
912  * and the leaves almost disjoint. This makes the merged topTree a more accurate
913  * representation of the true cost / load distribution, for merging
914  * and secondary refinement are approximated.
915  *
916  * A local sorting may be faster but makes the tree less accurate due to
917  * more likely running into overlapped local topTrees.
918  * */
919 
920  int Nsample = PartManager->NumPart / policy->SubSampleDistance;
921 
922  if(Nsample == 0 && PartManager->NumPart != 0) Nsample = 1;
923 
924 #pragma omp parallel for
925  for(i = 0; i < PartManager->NumPart; i ++)
926  {
927  LP[i].Key = P[i].Key;
928  LP[i].Cost = 1;
929  }
930 
931  /* First sort to ensure spatially 'even' subsamples; FIXME: This can probably
932  * be omitted in most cases. Usually the particles in memory won't be very far off
933  * from a peano order. */
934  if(policy->PreSort)
936 
937  /* now subsample */
938  for(i = 0; i < Nsample; i ++)
939  {
940  LP[i].Key = LP[i * policy->SubSampleDistance].Key;
941  LP[i].Cost = LP[i * policy->SubSampleDistance].Cost;
942  }
943 
944  if(policy->UseGlobalSort) {
945  mpsort_mpi(LP, Nsample, sizeof(struct local_particle_data), mp_order_by_key, 8, NULL, policy->GlobalSortComm);
946  } else {
947  qsort_openmp(LP, Nsample, sizeof(struct local_particle_data), order_by_key);
948  }
949 
950  walltime_measure("/Domain/DetermineTopTree/Sort");
951 
952  *topTreeSize = 1;
953  topTree[0].Daughter = -1;
954  topTree[0].Parent = -1;
955  topTree[0].Shift = BITS_PER_DIMENSION * 3;
956  topTree[0].StartKey = 0;
957  topTree[0].Count = 0;
958  topTree[0].Cost = 0;
959 
960  /* The tree building algorithm here requires the LP structure to
961  * be sorted by key, in which case we either refine
962  * the node the last particle lives in, or jump into a new empty node,
963  * as the particles are scanned through.
964  *
965  * I unfortunately cannot find the direct reference of this with
966  * the proof. I found it around 2012~2013 when writing psphray2
967  * and didn't cite it properly then!
968  *
969  * Here is a blog link that is related:
970  *
971  * https://devblogs.nvidia.com/parallelforall/thinking-parallel-part-iii-tree-construction-gpu/
972  *
973  * A few interesting papers that may make this faster and cheaper.
974  *
975  * http://sc07.supercomputing.org/schedule/pdf/pap117.pdf
976  *
977  * */
978 
979  /* 1. create a skeleton of the toptree.
980  * We do not use all particles to avoid excessive memory consumption.
981  *
982  * */
983 
984 
985  /* During the loop, topTree[?].Count is a flag to indicate if
986  * the node has been finished. We either refine the last leaf node
987  * or create a new leaf because of the peano/morton sorting.
988  * */
989  peano_t last_key = -1;
990  int last_leaf = -1;
991  i = 0;
992  while(i < Nsample) {
993 
994  int leaf = domain_toptree_get_subnode(topTree, LP[i].Key);
995 
996  if (leaf == last_leaf && topTree[leaf].Shift >= 3) {
997  /* two particles in a node? need refinement if possible. */
998  if(0 != domain_toptree_split(topTree, topTreeSize, MaxTopNodes, leaf)) {
999  /* out of memory, retry */
1000  myfree(LP);
1001  return 1;
1002  }
1003  /* pop the last particle and reinsert it */
1004  topTree[leaf].Count = 0;
1005 
1006  last_leaf = domain_toptree_insert(topTree, last_key, 0);
1007  /* retry the current particle. */
1008  continue;
1009  } else {
1010  if(topTree[leaf].Count != 0 && leaf != last_leaf) {
1011  /* meeting a node that's already been finished ?
1012  * This shall not happen when key is already sorted;
1013  * due to the sorting.
1014  *
1015  * If the Key hasn't changed sufficiently with this new particle we will see the last leaf again.
1016  * Normally we would refine, but if Shift == 0 we don't have space.
1017  * In this case we just add the current particle sample to the last toptree node.
1018  */
1019  endrun(10, "toptree[%d].Count=%d, shift %d, last_leaf=%d key = %ld i= %d Nsample = %d\n",
1020  leaf, topTree[leaf].Count, topTree[leaf].Shift, last_leaf,LP[i].Key, i, Nsample);
1021  }
1022  /* this will create a new node. */
1023  last_key = LP[i].Key;
1024  last_leaf = domain_toptree_insert(topTree, last_key, 0);
1025  i += policy->SubSampleDistance;
1026  continue;
1027  }
1028  }
1029  /* Alternativly we could have kept last_cost in the above loop and avoid
1030  * the second and third step: */
1031 
1032  /* 2. Remove the skelton particles from the tree to make it a skeleton;
1033  * note that we never bothered to add Cost when the skeleton was built.
1034  * otherwise we shall clean it here too.*/
1035  for(i = 0; i < *topTreeSize; i ++ ) {
1036  topTree[i].Count = 0;
1037  }
1038 
1039  /* 3. insert all particles to the skeleton tree; Count will be correct because we cleaned them.*/
1040  for(i = 0; i < Nsample; i ++ ) {
1041  domain_toptree_insert(topTree, LP[i].Key, LP[i].Cost);
1042  }
1043 
1044  myfree(LP);
1045 
1046  /* then compute the costs of the internal nodes. */
1047  domain_toptree_update_cost(topTree, 0);
1048 
1049  /* we leave truncation in another function, for costlimit and countlimit must be
1050  * used in secondary refinement*/
1051  return 0;
1052 }
static void mp_order_by_key(const void *data, void *radix, void *arg)
Definition: domain.c:1472
static int order_by_key(const void *a, const void *b)
Definition: domain.c:1458
static int domain_toptree_insert(struct local_topnode_data *topTree, peano_t Key, int64_t cost)
Definition: domain.c:749
static void domain_toptree_update_cost(struct local_topnode_data *topTree, int start)
Definition: domain.c:796
static int domain_toptree_get_subnode(struct local_topnode_data *topTree, peano_t key)
Definition: domain.c:738
static int domain_toptree_split(struct local_topnode_data *topTree, int *topTreeSize, const int MaxTopNodes, int i)
Definition: domain.c:760
#define mpsort_mpi(base, nmemb, elsize, radix, rsize, arg, comm)
Definition: mpsort.h:26
#define P
Definition: partmanager.h:88
uint64_t peano_t
Definition: peano.h:7
#define BITS_PER_DIMENSION
Definition: peano.h:9
MPI_Comm GlobalSortComm
Definition: domain.c:52
int64_t Cost
Definition: domain.c:74
int64_t Count
Definition: domain.c:67
int64_t Cost
Definition: domain.c:68

References BITS_PER_DIMENSION, local_topnode_data::Cost, local_particle_data::Cost, local_topnode_data::Count, local_topnode_data::Daughter, domain_toptree_get_subnode(), domain_toptree_insert(), domain_toptree_split(), domain_toptree_update_cost(), endrun(), DomainDecompositionPolicy::GlobalSortComm, local_particle_data::Key, mp_order_by_key(), mpsort_mpi, myfree, mymalloc, part_manager_type::NumPart, order_by_key(), P, local_topnode_data::Parent, PartManager, DomainDecompositionPolicy::PreSort, qsort_openmp, local_topnode_data::Shift, local_topnode_data::StartKey, DomainDecompositionPolicy::SubSampleDistance, DomainDecompositionPolicy::UseGlobalSort, and walltime_measure.

Referenced by domain_determine_global_toptree().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ domain_check_memory_bound()

static int domain_check_memory_bound ( const DomainDecomp ddecomp,
int64_t *  TopLeafWork,
int64_t *  TopLeafCount 
)
static

Definition at line 426 of file domain.c.

427 {
428  int NTask;
429  MPI_Comm_size(ddecomp->DomainComm, &NTask);
430 
431  /*Only used if the memory bound is not met */
432  int64_t * list_load = ta_malloc("list_load",int64_t, NTask);
433  int64_t * list_work = ta_malloc("list_work",int64_t, NTask);
434 
435  int64_t max_work = 0, max_load = 0, sumload = 0, sumwork = 0;
436  int ta;
437 
438  #pragma omp parallel for reduction(+: sumwork) reduction(+: sumload) reduction(max: max_load) reduction(max:max_work)
439  for(ta = 0; ta < NTask; ta++)
440  {
441  int64_t load = 0;
442  int64_t work = 0;
443  int i;
444  for(i = ddecomp->Tasks[ta].StartLeaf; i < ddecomp->Tasks[ta].EndLeaf; i ++)
445  {
446  load += TopLeafCount[i];
447  if(TopLeafWork)
448  work += TopLeafWork[i];
449  }
450 
451  list_load[ta] = load;
452  list_work[ta] = work;
453 
454  sumwork += work;
455  sumload += load;
456 
457  if(load > max_load)
458  max_load = load;
459  if(work > max_work)
460  max_work = work;
461  }
462 
463  if(TopLeafWork)
464  message(0, "Largest load: work=%g particle=%g\n",
465  max_work / ((double)sumwork / NTask), max_load / (((double) sumload) / NTask));
466  else
467  message(0, "Largest particle load particle=%g\n", max_load / (((double) sumload) / NTask));
468 
469  /*Leave a small number of particles for star formation */
471  {
472  message(0, "desired memory imbalance=%g (limit=%g, needed=%ld)\n",
473  (max_load * ((double) sumload ) / NTask ) / PartManager->MaxPart, domain_params.SetAsideFactor * PartManager->MaxPart, max_load);
474  message(0, "Balance breakdown:\n");
475  int i;
476  for(i = 0; i < NTask; i++)
477  {
478  message(0, "Task: [%3d] work=%8.4f particle load=%8.4f\n", i,
479  list_work[i] / ((double) sumwork / NTask), list_load[i] / (((double) sumload) / NTask));
480  }
481  return 1;
482  }
483  ta_free(list_work);
484  ta_free(list_load);
485  return 0;
486 }
static DomainParams domain_params
Definition: domain.c:43
#define ta_malloc(name, type, nele)
Definition: mymalloc.h:25
#define ta_free(p)
Definition: mymalloc.h:28
double SetAsideFactor
Definition: domain.h:59

References domain_params, DomainDecomp::DomainComm, part_manager_type::MaxPart, message(), NTask, PartManager, DomainParams::SetAsideFactor, task_data::StartLeaf, ta_free, ta_malloc, and DomainDecomp::Tasks.

Referenced by domain_balance().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ domain_compute_costs()

static void domain_compute_costs ( const DomainDecomp ddecomp,
int64_t *  TopLeafWork,
int64_t *  TopLeafCount 
)
static

Definition at line 1280 of file domain.c.

1281 {
1282  int i;
1283  int NumThreads = omp_get_max_threads();
1284  int64_t * local_TopLeafWork = NULL;
1285  if(TopLeafWork) {
1286  local_TopLeafWork = (int64_t *) mymalloc("local_TopLeafWork", NumThreads * ddecomp->NTopLeaves * sizeof(local_TopLeafWork[0]));
1287  memset(local_TopLeafWork, 0, NumThreads * ddecomp->NTopLeaves * sizeof(local_TopLeafWork[0]));
1288  }
1289  int64_t * local_TopLeafCount = (int64_t *) mymalloc("local_TopLeafCount", NumThreads * ddecomp->NTopLeaves * sizeof(local_TopLeafCount[0]));
1290  memset(local_TopLeafCount, 0, NumThreads * ddecomp->NTopLeaves * sizeof(local_TopLeafCount[0]));
1291 
1292 #pragma omp parallel
1293  {
1294  int tid = omp_get_thread_num();
1295  int n;
1296 
1297  #pragma omp for
1298  for(n = 0; n < PartManager->NumPart; n++)
1299  {
1300  /* Skip garbage particles: they have zero work
1301  * and can be removed by exchange if under memory pressure.*/
1302  if(P[n].IsGarbage)
1303  continue;
1304  int no = domain_get_topleaf(P[n].Key, ddecomp);
1305 
1306  if(local_TopLeafWork)
1307  local_TopLeafWork[no + tid * ddecomp->NTopLeaves] += 1;
1308 
1309  local_TopLeafCount[no + tid * ddecomp->NTopLeaves] += 1;
1310  }
1311  }
1312 
1313 #pragma omp parallel for
1314  for(i = 0; i < ddecomp->NTopLeaves; i++)
1315  {
1316  int tid;
1317  if(local_TopLeafWork)
1318  for(tid = 1; tid < NumThreads; tid++) {
1319  local_TopLeafWork[i] += local_TopLeafWork[i + tid * ddecomp->NTopLeaves];
1320  }
1321 
1322  for(tid = 1; tid < NumThreads; tid++) {
1323  local_TopLeafCount[i] += local_TopLeafCount[i + tid * ddecomp->NTopLeaves];
1324  }
1325  }
1326 
1327  if(local_TopLeafWork) {
1328  MPI_Allreduce(local_TopLeafWork, TopLeafWork, ddecomp->NTopLeaves, MPI_INT64, MPI_SUM, ddecomp->DomainComm);
1329  myfree(local_TopLeafWork);
1330  }
1331 
1332  MPI_Allreduce(local_TopLeafCount, TopLeafCount, ddecomp->NTopLeaves, MPI_INT64, MPI_SUM, ddecomp->DomainComm);
1333  myfree(local_TopLeafCount);
1334 }
static int domain_get_topleaf(const peano_t key, const DomainDecomp *ddecomp)
Definition: domain.h:74
#define MPI_INT64
Definition: system.h:12

References domain_get_topleaf(), DomainDecomp::DomainComm, MPI_INT64, myfree, mymalloc, DomainDecomp::NTopLeaves, part_manager_type::NumPart, P, and PartManager.

Referenced by domain_balance().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ domain_create_topleaves()

static void domain_create_topleaves ( DomainDecomp ddecomp,
int  no,
int *  next 
)
static

This function walks the global top tree in order to establish the number of leaves it has. These leaves are then distributed to different processors.

the pointer next points to the next free item on TopLeaves array.

Definition at line 721 of file domain.c.

722 {
723  int i;
724  if(ddecomp->TopNodes[no].Daughter == -1)
725  {
726  ddecomp->TopNodes[no].Leaf = *next;
727  ddecomp->TopLeaves[*next].topnode = no;
728  (*next)++;
729  }
730  else
731  {
732  for(i = 0; i < 8; i++)
733  domain_create_topleaves(ddecomp, ddecomp->TopNodes[no].Daughter + i, next);
734  }
735 }

References topnode_data::Daughter, topnode_data::Leaf, DomainDecomp::TopLeaves, topleaf_data::topnode, and DomainDecomp::TopNodes.

Referenced by domain_attempt_decompose().

Here is the caller graph for this function:

◆ domain_decompose_full()

void domain_decompose_full ( DomainDecomp ddecomp)

This is the main routine for the domain decomposition. It acts as a driver routine that allocates various temporary buffers, maps the particles back onto the periodic box if needed, and then does the domain decomposition, and a final Peano-Hilbert order of all particles as a tuning measure.

Definition at line 155 of file domain.c.

156 {
157  static DomainDecompositionPolicy policies[16];
158  static int Npolicies = 0;
159 
160  /* start from last successful policy to avoid retries */
161  static int LastSuccessfulPolicy = 0;
162 
163  if (Npolicies == 0) {
164  const int NincreaseAlloc = 16;
165  Npolicies = domain_policies_init(policies, NincreaseAlloc, 4);
166  }
167 
168  walltime_measure("/Misc");
169 
170  message(0, "domain decomposition... (presently allocated=%g MB)\n", mymalloc_usedbytes() / (1024.0 * 1024.0));
171 
172  int decompose_failed = 1;
173  int i;
174  for(i = LastSuccessfulPolicy; i < Npolicies; i ++)
175  {
176  domain_free(ddecomp);
177 
178 #ifdef DEBUG
180 #endif
181  int MaxTopNodes = domain_allocate(ddecomp, &policies[i]);
182 
183  message(0, "Attempting new domain decomposition policy: TopNodeAllocFactor=%g, UseglobalSort=%d, SubSampleDistance=%d UsePreSort=%d\n",
184  policies[i].TopNodeAllocFactor, policies[i].UseGlobalSort, policies[i].SubSampleDistance, policies[i].PreSort);
185 
186  decompose_failed = domain_attempt_decompose(ddecomp, &policies[i], MaxTopNodes);
187  decompose_failed = MPIU_Any(decompose_failed, ddecomp->DomainComm);
188 
189  if(decompose_failed)
190  continue;
191 
192  LastSuccessfulPolicy = i;
193  if(domain_balance(ddecomp) == 0)
194  break;
195  }
196 
197  if(decompose_failed) {
198  endrun(0, "No suitable domain decomposition policy worked for this particle distribution\n");
199  }
200 
201  /* copy the used nodes from temp to the true. */
202  struct topleaf_data * OldTopLeaves = ddecomp->TopLeaves;
203  struct topnode_data * OldTopNodes = ddecomp->TopNodes;
204 
205  ddecomp->TopNodes = (struct topnode_data *) mymalloc2("TopNodes", sizeof(ddecomp->TopNodes[0]) * ddecomp->NTopNodes);
206  /* add 1 extra to mark the end of TopLeaves; see assign */
207  ddecomp->TopLeaves = (struct topleaf_data *) mymalloc2("TopLeaves", sizeof(ddecomp->TopLeaves[0]) * (ddecomp->NTopLeaves + 1));
208 
209  memcpy(ddecomp->TopLeaves, OldTopLeaves, ddecomp->NTopLeaves* sizeof(ddecomp->TopLeaves[0]));
210  memcpy(ddecomp->TopNodes, OldTopNodes, ddecomp->NTopNodes * sizeof(ddecomp->TopNodes[0]));
211 
212  /* no longer useful */
213  myfree(OldTopLeaves);
214  myfree(OldTopNodes);
215 
216  if(domain_exchange(domain_layoutfunc, ddecomp, 0, NULL, PartManager, SlotsManager, 10000, ddecomp->DomainComm))
217  endrun(1929,"Could not exchange particles\n");
218 
219  /*Do a garbage collection so that the slots are ordered
220  *the same as the particles, garbage is at the end and all particles are in peano order.*/
222 
223  /*Ensure collective*/
224  MPIU_Barrier(ddecomp->DomainComm);
225  message(0, "Domain decomposition done.\n");
226 
227  report_memory_usage("DOMAIN");
228 
229  walltime_measure("/Domain/PeanoSort");
230 }
void domain_free(DomainDecomp *ddecomp)
Definition: domain.c:320
static int domain_balance(DomainDecomp *ddecomp)
Definition: domain.c:400
static int domain_attempt_decompose(DomainDecomp *ddecomp, DomainDecompositionPolicy *policy, const int MaxTopNodes)
Definition: domain.c:338
static int domain_layoutfunc(int n, const void *userdata)
Definition: domain.c:707
static int domain_allocate(DomainDecomp *ddecomp, DomainDecompositionPolicy *policy)
Definition: domain.c:285
static int domain_policies_init(DomainDecompositionPolicy policies[], const int NincreaseAlloc, const int SwitchToGlobal)
Definition: domain.c:252
void domain_test_id_uniqueness(struct part_manager_type *pman)
Definition: exchange.c:570
int domain_exchange(ExchangeLayoutFunc layoutfunc, const void *layout_userdata, int do_gc, struct DriftData *drift, struct part_manager_type *pman, struct slots_manager_type *sman, int maxiter, MPI_Comm Comm)
Definition: exchange.c:103
struct slots_manager_type SlotsManager[1]
Definition: slotsmanager.c:7
void slots_gc_sorted(struct part_manager_type *pman, struct slots_manager_type *sman)
Definition: slotsmanager.c:442
int MPIU_Any(int condition, MPI_Comm comm)
Definition: system.c:545
#define MPIU_Barrier(comm)
Definition: system.h:103

References domain_allocate(), domain_attempt_decompose(), domain_balance(), domain_exchange(), domain_free(), domain_layoutfunc(), domain_policies_init(), domain_test_id_uniqueness(), DomainDecomp::DomainComm, endrun(), message(), MPIU_Any(), MPIU_Barrier, myfree, mymalloc2, mymalloc_usedbytes, DomainDecomp::NTopLeaves, DomainDecomp::NTopNodes, PartManager, report_memory_usage, slots_gc_sorted(), SlotsManager, DomainDecomp::TopLeaves, DomainDecomp::TopNodes, and walltime_measure.

Referenced by begrun(), do_force_test(), domain_maintain(), run(), run_gravity_test(), runfof(), runpower(), and test_fof().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ domain_determine_global_toptree()

int domain_determine_global_toptree ( DomainDecompositionPolicy policy,
struct local_topnode_data topTree,
int *  topTreeSize,
int  MaxTopNodes,
MPI_Comm  DomainComm 
)
static

This function constructs the global top-level tree node that is used for the domain decomposition. This is done by considering the string of Peano-Hilbert keys for all particles, which is recursively chopped off in pieces of eight segments until each segment holds at most a certain number of particles.

Definition at line 1147 of file domain.c.

1149 {
1150  walltime_measure("/Domain/DetermineTopTree/Misc");
1151 
1152  /*
1153  * Build local refinement with a subsample of particles
1154  * 1/16 is used because each local topTree node takes about 32 bytes.
1155  **/
1156 
1157  int local_refine_failed = domain_check_for_local_refine_subsample(policy, topTree, topTreeSize, MaxTopNodes);
1158 
1159  walltime_measure("/Domain/DetermineTopTree/LocalRefine/Init");
1160 
1161  if(MPIU_Any(local_refine_failed, DomainComm)) {
1162  message(0, "We are out of Topnodes. \n");
1163  return 1;
1164  }
1165 
1166  int64_t TotCost = 0, TotCount = 0;
1167  int64_t costlimit, countlimit;
1168 
1169  MPI_Allreduce(&topTree[0].Cost, &TotCost, 1, MPI_INT64, MPI_SUM, DomainComm);
1170  MPI_Allreduce(&topTree[0].Count, &TotCount, 1, MPI_INT64, MPI_SUM, DomainComm);
1171 
1172  costlimit = TotCost / (policy->NTopLeaves);
1173  countlimit = TotCount / (policy->NTopLeaves);
1174 
1175  domain_toptree_truncate(topTree, topTreeSize, countlimit, costlimit);
1176 
1177  walltime_measure("/Domain/DetermineTopTree/LocalRefine/truncate");
1178 
1179  if(*topTreeSize > 10 * policy->NTopLeaves) {
1180  message(1, "local TopTree Size =%d >> expected = %d; Usually this indicates very bad imbalance, due to a giant density peak.\n",
1181  *topTreeSize, policy->NTopLeaves);
1182  }
1183 
1184 #if 0
1185  char buf[1000];
1186  sprintf(buf, "topnodes.bin.%d", ThisTask);
1187  FILE * fd = fopen(buf, "w");
1188 
1189  fwrite(topTree, sizeof(struct local_topnode_data), *topTreeSize, fd);
1190  fclose(fd);
1191 
1192  //MPI_Barrier(DomainComm);
1193  //MPI_Abort(DomainComm, 0);
1194 #endif
1195 
1196  /* we now need to exchange tree parts and combine them as needed */
1197  int combine_failed = domain_nonrecursively_combine_topTree(topTree, topTreeSize, MaxTopNodes, DomainComm);
1198 
1199  walltime_measure("/Domain/DetermineTopTree/Combine");
1200 
1201  if(combine_failed)
1202  {
1203  message(0, "can't combine trees due to lack of storage. Will try again.\n");
1204  return 1;
1205  }
1206 
1207  /* now let's see whether we should still more refinements, based on the estimated cumulative cost/count in each cell */
1208 
1209  int global_refine_failed = domain_global_refine(topTree, topTreeSize, MaxTopNodes, countlimit, costlimit);
1210 
1211  walltime_measure("/Domain/DetermineTopTree/Addnodes");
1212 
1213  if(MPIU_Any(global_refine_failed, DomainComm)) {
1214  message(0, "Global refine failed: toptreeSize = %d, MaxTopNodes = %d\n", *topTreeSize, MaxTopNodes);
1215  return 1;
1216  }
1217 
1218  message(0, "Final local topTree size = %d per segment = %g.\n", *topTreeSize, 1.0 * (*topTreeSize) / (policy->NTopLeaves));
1219 
1220  return 0;
1221 }
static int domain_check_for_local_refine_subsample(DomainDecompositionPolicy *policy, struct local_topnode_data *topTree, int *topTreeSize, const int MaxTopNodes)
Definition: domain.c:895
static void domain_toptree_truncate(struct local_topnode_data *topTree, int *topTreeSize, int64_t countlimit, int64_t costlimit)
Definition: domain.c:865
static int domain_nonrecursively_combine_topTree(struct local_topnode_data *topTree, int *topTreeSize, const int MaxTopNodes, MPI_Comm DomainComm)
Definition: domain.c:1056
static int domain_global_refine(struct local_topnode_data *topTree, int *topTreeSize, const int MaxTopNodes, int64_t countlimit, int64_t costlimit)
Definition: domain.c:1224
int ThisTask
Definition: test_exchange.c:23

References local_topnode_data::Cost, local_topnode_data::Count, domain_check_for_local_refine_subsample(), domain_global_refine(), domain_nonrecursively_combine_topTree(), domain_toptree_truncate(), message(), MPI_INT64, MPIU_Any(), DomainDecompositionPolicy::NTopLeaves, ThisTask, and walltime_measure.

Referenced by domain_attempt_decompose().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ domain_free()

void domain_free ( DomainDecomp ddecomp)

Definition at line 320 of file domain.c.

321 {
322  if(ddecomp->domain_allocated_flag)
323  {
324  myfree(ddecomp->TopLeaves);
325  myfree(ddecomp->TopNodes);
326  myfree(ddecomp->Tasks);
327  ddecomp->domain_allocated_flag = 0;
328  }
329 }

References DomainDecomp::domain_allocated_flag, myfree, DomainDecomp::Tasks, DomainDecomp::TopLeaves, and DomainDecomp::TopNodes.

Referenced by begrun(), do_force_test(), domain_decompose_full(), and run_gravity_test().

Here is the caller graph for this function:

◆ domain_global_refine()

static int domain_global_refine ( struct local_topnode_data topTree,
int *  topTreeSize,
const int  MaxTopNodes,
int64_t  countlimit,
int64_t  costlimit 
)
static

Definition at line 1224 of file domain.c.

1227 {
1228  int i;
1229 
1230  /* At this point we have refined the local particle tree so that each
1231  * topNode contains a Cost and Count below the cost threshold. We have then
1232  * done a global merge of the particle tree. Some of our topTree may now contain
1233  * more particles than the Cost threshold, but repeating refinement using the local
1234  * algorithm is complicated - particles inside any particular topNode may be
1235  * on another processor. So we do a local volume based refinement here. This
1236  * just cuts each topNode above the threshold into 8 equal-sized portions by
1237  * subdividing the peano key.
1238  *
1239  * NOTE: Just like the merge, this does not correctly preserve costs!
1240  * Costs are just divided by 8, because recomputing them for the daughter nodes
1241  * will be expensive.
1242  * In practice this seems to work fine, probably because the cost distribution
1243  * is not that unbalanced. */
1244 
1245  message(0, "local topTree size before appending=%d\n", *topTreeSize);
1246 
1247  /*Note that *topTreeSize will change inside the loop*/
1248  for(i = 0; i < *topTreeSize; i++)
1249  {
1250  /*If this node has no children and non-zero size*/
1251  if(topTree[i].Daughter >= 0 || topTree[i].Shift <= 0) continue;
1252 
1253  /*If this node is also more costly than the limit*/
1254  if(topTree[i].Count < countlimit && topTree[i].Cost < costlimit) continue;
1255 
1256  /*If we have no space for another 8 topTree, exit */
1257  if((*topTreeSize + 8) > MaxTopNodes) {
1258  return 1;
1259  }
1260 
1261  topTree[i].Daughter = *topTreeSize;
1262  int j;
1263  for(j = 0; j < 8; j++)
1264  {
1265  int sub = topTree[i].Daughter + j;
1266  topTree[sub].Shift = topTree[i].Shift - 3;
1267  topTree[sub].Count = topTree[i].Count / 8;
1268  topTree[sub].Cost = topTree[i].Cost / 8;
1269  topTree[sub].Daughter = -1;
1270  topTree[sub].Parent = i;
1271  topTree[sub].StartKey = topTree[i].StartKey + j * (1L << topTree[sub].Shift);
1272  }
1273  (*topTreeSize) += 8;
1274  }
1275  return 0;
1276 }

References local_topnode_data::Cost, local_topnode_data::Count, local_topnode_data::Daughter, message(), local_topnode_data::Parent, local_topnode_data::Shift, and local_topnode_data::StartKey.

Referenced by domain_determine_global_toptree().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ domain_layoutfunc()

static int domain_layoutfunc ( int  n,
const void *  userdata 
)
static

This function determines which particles that are currently stored on the local CPU have to be moved off according to the domain decomposition.

layoutfunc decides the target Task of particle p (used by subfind_distribute).

Definition at line 707 of file domain.c.

707  {
708  const DomainDecomp * ddecomp = (const DomainDecomp *) userdata;
709  peano_t key = P[n].Key;
710  int no = domain_get_topleaf(key, ddecomp);
711  return ddecomp->TopLeaves[no].Task;
712 }

References domain_get_topleaf(), P, topleaf_data::Task, and DomainDecomp::TopLeaves.

Referenced by domain_decompose_full(), and domain_maintain().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ domain_maintain()

void domain_maintain ( DomainDecomp ddecomp,
struct DriftData drift 
)

Definition at line 234 of file domain.c.

235 {
236  message(0, "Attempting a domain exchange\n");
237 
238  walltime_measure("/Misc");
239 
240  /* Try a domain exchange.
241  * If we have no memory for the particles,
242  * bail and do a full domain*/
243  if(0 != domain_exchange(domain_layoutfunc, ddecomp, 0, drift, PartManager, SlotsManager, 10000, ddecomp->DomainComm)) {
244  domain_decompose_full(ddecomp);
245  return;
246  }
247 }
void domain_decompose_full(DomainDecomp *ddecomp)
Definition: domain.c:155

References domain_decompose_full(), domain_exchange(), domain_layoutfunc(), DomainDecomp::DomainComm, message(), PartManager, SlotsManager, and walltime_measure.

Referenced by run().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ domain_nonrecursively_combine_topTree()

static int domain_nonrecursively_combine_topTree ( struct local_topnode_data topTree,
int *  topTreeSize,
const int  MaxTopNodes,
MPI_Comm  DomainComm 
)
static

Definition at line 1056 of file domain.c.

1057 {
1058  /*
1059  * combine topTree non recursively, this uses MPI_Bcast within a group.
1060  * it shall be quite a bit faster (~ x2) than the old recursive scheme.
1061  *
1062  * it takes less time at higher sep.
1063  *
1064  * The communication should have been done with MPI Inter communicator.
1065  * but I couldn't figure out how to do it that way.
1066  * */
1067  int sep = 1;
1068  int errorflag = 0;
1069  /*Number of tasks to decompose to*/
1070  int NTask;
1071  MPI_Comm_size(DomainComm, &NTask);
1072  /*Which task is this?*/
1073  int ThisTask;
1074  MPI_Comm_rank(DomainComm, &ThisTask);
1075 
1076  for(sep = 1; sep < NTask; sep *=2) {
1077  /* build the subcommunicators for broadcasting */
1078  int Color = ThisTask / sep;
1079  int Key = ThisTask % sep;
1080 
1081  /* non leaders will skip exchanges */
1082  if(Key != 0)
1083  continue;
1084 
1085  /* leaders of even color will combine nodes from next odd color,
1086  * so that when sep is increased eventually rank 0 will have all
1087  * nodes */
1088  if(Color % 2 == 0) {
1089  /* even guys recv */
1090  int recvTask = ThisTask + sep;
1091  int ntopnodes_import = 0;
1092  if(recvTask < NTask) {
1093  MPI_Recv(&ntopnodes_import, 1, MPI_INT, recvTask, TAG_GRAV_A,
1094  DomainComm, MPI_STATUS_IGNORE);
1095  if(ntopnodes_import < 0) {
1096  endrun(1, "severe domain error using a unintended rank \n");
1097  }
1098  int mergesize = ntopnodes_import;
1099  if(ntopnodes_import < *topTreeSize)
1100  mergesize = *topTreeSize;
1101  struct local_topnode_data * topTree_import = (struct local_topnode_data *) mymalloc("topTree_import",
1102  mergesize * sizeof(struct local_topnode_data));
1103 
1104  MPI_Recv(topTree_import,
1105  ntopnodes_import * sizeof(struct local_topnode_data), MPI_BYTE,
1106  recvTask, TAG_GRAV_B,
1107  DomainComm, MPI_STATUS_IGNORE);
1108 
1109  if((*topTreeSize + ntopnodes_import) > MaxTopNodes) {
1110  errorflag = 1;
1111  } else {
1112  if(ntopnodes_import > 0 ) {
1113  domain_toptree_merge(topTree, topTree_import, 0, 0, topTreeSize, MaxTopNodes);
1114  }
1115  }
1116  myfree(topTree_import);
1117  }
1118  } else {
1119  /* odd guys send */
1120  int recvTask = ThisTask - sep;
1121  if(recvTask >= 0) {
1122  MPI_Send(topTreeSize, 1, MPI_INT, recvTask, TAG_GRAV_A, DomainComm);
1123  MPI_Send(topTree, (*topTreeSize) * sizeof(struct local_topnode_data), MPI_BYTE,
1124  recvTask, TAG_GRAV_B, DomainComm);
1125  }
1126  *topTreeSize = -1;
1127  }
1128  }
1129 
1130  MPI_Bcast(topTreeSize, 1, MPI_INT, 0, DomainComm);
1131  /* Check that the merge succeeded*/
1132  if(*topTreeSize < 0 || *topTreeSize >= MaxTopNodes) {
1133  errorflag = 1;
1134  }
1135  int errorflagall = MPIU_Any(errorflag, DomainComm);
1136  if(errorflagall == 0)
1137  MPI_Bcast(topTree, (*topTreeSize) * sizeof(struct local_topnode_data), MPI_BYTE, 0, DomainComm);
1138  return errorflagall;
1139 }
static void domain_toptree_merge(struct local_topnode_data *treeA, struct local_topnode_data *treeB, int noA, int noB, int *treeASize, const int MaxTopNodes)
Definition: domain.c:1353
#define TAG_GRAV_B
Definition: domain.c:24
#define TAG_GRAV_A
Definition: domain.c:23

References domain_toptree_merge(), endrun(), local_particle_data::Key, MPIU_Any(), myfree, mymalloc, NTask, TAG_GRAV_A, TAG_GRAV_B, and ThisTask.

Referenced by domain_determine_global_toptree().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ domain_policies_init()

static int domain_policies_init ( DomainDecompositionPolicy  policies[],
const int  NincreaseAlloc,
const int  SwitchToGlobal 
)
static

Definition at line 252 of file domain.c.

255 {
256  int NTask;
257  MPI_Comm_size(MPI_COMM_WORLD, &NTask);
258 
259  int i;
260  for(i = 0; i < NincreaseAlloc; i ++) {
261  policies[i].TopNodeAllocFactor = domain_params.TopNodeAllocFactor * pow(1.3, i);
262  /* global sorting is slower than a local sorting, but tends to produce a more
263  * balanced domain tree that is easier to merge.
264  * */
266  if(i >= SwitchToGlobal)
267  policies[i].UseGlobalSort = 1;
268  /* The extent of the global sorting may be different from the extent of the Domain communicator*/
269  policies[i].GlobalSortComm = MPI_COMM_WORLD;
270  /* global sorting of particles is slow, so we add a slower presort to even the local
271  * particle distribution before subsampling, improves the balance, too */
272  policies[i].PreSort = 0;
273  if(i >= 1)
274  policies[i].PreSort = 1;
275  policies[i].SubSampleDistance = 16;
276  /* Desired number of TopLeaves should scale like the total number of processors*/
278  }
279 
280  return NincreaseAlloc;
281 }
int DomainOverDecompositionFactor
Definition: domain.h:53
int DomainUseGlobalSorting
Definition: domain.h:55
double TopNodeAllocFactor
Definition: domain.h:57

References domain_params, DomainParams::DomainOverDecompositionFactor, DomainParams::DomainUseGlobalSorting, DomainDecompositionPolicy::GlobalSortComm, NTask, DomainDecompositionPolicy::NTopLeaves, DomainDecompositionPolicy::PreSort, DomainDecompositionPolicy::SubSampleDistance, DomainDecompositionPolicy::TopNodeAllocFactor, DomainParams::TopNodeAllocFactor, and DomainDecompositionPolicy::UseGlobalSort.

Referenced by domain_decompose_full().

Here is the caller graph for this function:

◆ domain_toptree_garbage_collection()

static void domain_toptree_garbage_collection ( struct local_topnode_data topTree,
int  start,
int *  last_free 
)
static

Definition at line 839 of file domain.c.

840 {
841 
842  if(topTree[start].Daughter == -1) return;
843  int j;
844 
845  int oldd = topTree[start].Daughter;
846  int newd = *last_free;
847 
848  topTree[start].Daughter = newd;
849 
850  (*last_free) += 8;
851 
852  /* copy first in case oldd + j is overwritten by the recursed gc
853  * if last_free is less than oldd */
854  for(j = 0; j < 8; j ++) {
855  topTree[newd + j] = topTree[oldd + j];
856  topTree[newd + j].Parent = start;
857  }
858 
859  for(j = 0; j < 8; j ++) {
860  domain_toptree_garbage_collection(topTree, newd + j, last_free);
861  }
862 }
static void domain_toptree_garbage_collection(struct local_topnode_data *topTree, int start, int *last_free)
Definition: domain.c:839

References local_topnode_data::Daughter, and local_topnode_data::Parent.

Referenced by domain_toptree_truncate().

Here is the caller graph for this function:

◆ domain_toptree_get_subnode()

static int domain_toptree_get_subnode ( struct local_topnode_data topTree,
peano_t  key 
)
static

Definition at line 738 of file domain.c.

740 {
741  int no = 0;
742  while(topTree[no].Daughter >= 0) {
743  no = topTree[no].Daughter + ((key - topTree[no].StartKey) >> (topTree[no].Shift - 3));
744  }
745  return no;
746 }

References local_topnode_data::Daughter, and local_topnode_data::StartKey.

Referenced by domain_check_for_local_refine_subsample(), and domain_toptree_insert().

Here is the caller graph for this function:

◆ domain_toptree_insert()

static int domain_toptree_insert ( struct local_topnode_data topTree,
peano_t  Key,
int64_t  cost 
)
static

Definition at line 749 of file domain.c.

751 {
752  /* insert */
753  int leaf = domain_toptree_get_subnode(topTree, Key);
754  topTree[leaf].Count ++;
755  topTree[leaf].Cost += cost;
756  return leaf;
757 }

References local_topnode_data::Cost, topleaf_extdata::cost, local_topnode_data::Count, domain_toptree_get_subnode(), and topleaf_extdata::Key.

Referenced by domain_check_for_local_refine_subsample().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ domain_toptree_merge()

static void domain_toptree_merge ( struct local_topnode_data treeA,
struct local_topnode_data treeB,
int  noA,
int  noB,
int *  treeASize,
const int  MaxTopNodes 
)
static

Merge treeB into treeA.

The function recursively merge the cost and refinement of treeB into treeA.

At the initial call, noA and noB must both be the root node.

When the structure is mismatched, e.g. a leaf in A meets a branch in B or a leaf in B meets a branch in A, the cost of the leaf is splitted evenly. This is only an approximation then the leaf is not empty.

We therefore have an incentive to minimize overlaps between treeB and treeA. local_refinement does a global sorting of keys to help that.

Definition at line 1353 of file domain.c.

1356 {
1357  int j, sub;
1358  int64_t count;
1359  int64_t cost;
1360 
1361  if(treeB[noB].Shift < treeA[noA].Shift)
1362  {
1363  /* Add B to A */
1364  /* Create a daughter to a, since we will merge B to A's daughter*/
1365  if(treeA[noA].Daughter < 0)
1366  {
1367  if((*treeASize + 8) >= MaxTopNodes) {
1368  endrun(88, "Too many Topnodes; this shall not happen because we ensure there is enough and bailed earlier than this\n");
1369  }
1370  /* noB must have a parent if we are here, since noB is lower than noA;
1371  * noA must have already merged in the cost of noB's parent.
1372  * This is the first time we create these children in A, thus,
1373  * We shall evenly divide the non-B part of the cost in these children;
1374  * */
1375 
1376  count = treeA[noA].Count - treeB[treeB[noB].Parent].Count;
1377  cost = treeA[noA].Cost - treeB[treeB[noB].Parent].Cost;
1378 
1379  treeA[noA].Daughter = *treeASize;
1380  for(j = 0; j < 8; j++)
1381  {
1382 
1383  sub = treeA[noA].Daughter + j;
1384  treeA[sub].Shift = treeA[noA].Shift - 3;
1385  treeA[sub].Count = (j + 1) * count / 8 - j * count / 8;
1386  treeA[sub].Cost = (j + 1) * cost / 8 - j * cost / 8;
1387  treeA[sub].Daughter = -1;
1388  treeA[sub].Parent = noA;
1389  treeA[sub].StartKey = treeA[noA].StartKey + j * (1L << treeA[sub].Shift);
1390  }
1391  (*treeASize) += 8;
1392  }
1393 
1394  /* find the sub node in A for me and merge, this would bring noB and sub on the same shift, drop to next case */
1395  sub = treeA[noA].Daughter + ((treeB[noB].StartKey - treeA[noA].StartKey) >> (treeA[noA].Shift - 3));
1396  domain_toptree_merge(treeA, treeB, sub, noB, treeASize, MaxTopNodes);
1397  }
1398  else if(treeB[noB].Shift == treeA[noA].Shift)
1399  {
1400  treeA[noA].Count += treeB[noB].Count;
1401  treeA[noA].Cost += treeB[noB].Cost;
1402 
1403  /* Prefer to go down B; this would trigger the previous case */
1404  if(treeB[noB].Daughter >= 0)
1405  {
1406  for(j = 0; j < 8; j++)
1407  {
1408  sub = treeB[noB].Daughter + j;
1409  if(treeB[sub].Shift >= treeB[noB].Shift) {
1410  endrun(1, "Child node %d has shift %d, parent %d has shift %d. treeB is corrupt. \n",
1411  sub, treeB[sub].Shift, noB, treeB[noB].Shift);
1412  }
1413  domain_toptree_merge(treeA, treeB, noA, sub, treeASize, MaxTopNodes);
1414  }
1415  }
1416  else
1417  {
1418  /* We can't divide by B so we do it for A, this may trigger the next branch, since
1419  * we are lowering A */
1420  if(treeA[noA].Daughter >= 0) {
1421  for(j = 0; j < 8; j++) {
1422  sub = treeA[noA].Daughter + j;
1423  domain_toptree_merge(treeA, treeB, sub, noB, treeASize, MaxTopNodes);
1424  }
1425  }
1426  }
1427  }
1428  else if(treeB[noB].Shift > treeA[noA].Shift)
1429  {
1430  /* Since we only know how to split A, here we simply add a spatial average to A */
1431  uint64_t n = 1L << (treeB[noB].Shift - treeA[noA].Shift);
1432 
1433  if(treeB[noB].Shift - treeA[noA].Shift > 60) {
1434  message(1, "Warning: Refusing to merge two tree nodes of wildly different depth: %d %d;\n ", treeB[noB].Shift, treeA[noA].Shift);
1435  n = 0;
1436  }
1437 
1438  count = treeB[noB].Count;
1439  cost = treeB[noB].Cost;
1440 
1441  if (n > 0) {
1442  /* this is no longer conserving total cost but it should be fine .. */
1443  treeA[noA].Count += count / n;
1444  treeA[noA].Cost += cost / n;
1445 
1446  // message(1, "adding cost to %d %td, %td\n", noA, count / n, cost / n);
1447  if(treeA[noA].Daughter >= 0) {
1448  for(j = 0; j < 8; j++) {
1449  sub = treeA[noA].Daughter + j;
1450  domain_toptree_merge(treeA, treeB, sub, noB, treeASize, MaxTopNodes);
1451  }
1452  }
1453  }
1454  }
1455 }

References local_topnode_data::Cost, local_topnode_data::Count, local_topnode_data::Daughter, endrun(), message(), local_topnode_data::Parent, local_topnode_data::Shift, and local_topnode_data::StartKey.

Referenced by domain_nonrecursively_combine_topTree().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ domain_toptree_split()

static int domain_toptree_split ( struct local_topnode_data topTree,
int *  topTreeSize,
const int  MaxTopNodes,
int  i 
)
static

Definition at line 760 of file domain.c.

762 {
763  int j;
764  /* we ran out of top nodes and must get more.*/
765  if((*topTreeSize + 8) > MaxTopNodes)
766  return 1;
767 
768  if(topTree[i].Shift < 3) {
769  endrun(1, "Failed to build a TopTree -- particles overly clustered.\n");
770  }
771 
772  /*Make a new topnode section attached to this node*/
773  topTree[i].Daughter = *topTreeSize;
774  (*topTreeSize) += 8;
775 
776  /* Initialise this topnode with new sub nodes*/
777  for(j = 0; j < 8; j++)
778  {
779  const int sub = topTree[i].Daughter + j;
780  /* The new sub nodes have this node as parent
781  * and no daughters.*/
782  topTree[sub].Daughter = -1;
783  topTree[sub].Parent = i;
784  /* Shorten the peano key by a factor 8, reflecting the oct-tree level.*/
785  topTree[sub].Shift = topTree[i].Shift - 3;
786  /* This is the region of peanospace covered by this node.*/
787  topTree[sub].StartKey = topTree[i].StartKey + j * (1L << topTree[sub].Shift);
788  /* We will compute the cost in the node below.*/
789  topTree[sub].Count = 0;
790  topTree[sub].Cost = 0;
791  }
792  return 0;
793 }

References local_topnode_data::Cost, local_topnode_data::Count, local_topnode_data::Daughter, endrun(), local_topnode_data::Parent, local_topnode_data::Shift, and local_topnode_data::StartKey.

Referenced by domain_check_for_local_refine_subsample().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ domain_toptree_truncate()

static void domain_toptree_truncate ( struct local_topnode_data topTree,
int *  topTreeSize,
int64_t  countlimit,
int64_t  costlimit 
)
static

Definition at line 865 of file domain.c.

868 {
869 
870  /* first terminate the tree.*/
871  domain_toptree_truncate_r(topTree, 0, countlimit, costlimit);
872 
873  /* then remove the unused nodes from the topTree storage. This is important
874  * for efficient global merge */
875  *topTreeSize = 1; /* put in the root node -- it's never a garbage . */
876  domain_toptree_garbage_collection(topTree, 0, topTreeSize);
877 }
static void domain_toptree_truncate_r(struct local_topnode_data *topTree, int start, int64_t countlimit, int64_t costlimit)
Definition: domain.c:811

References domain_toptree_garbage_collection(), and domain_toptree_truncate_r().

Referenced by domain_determine_global_toptree().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ domain_toptree_truncate_r()

static void domain_toptree_truncate_r ( struct local_topnode_data topTree,
int  start,
int64_t  countlimit,
int64_t  costlimit 
)
static

Definition at line 811 of file domain.c.

812 {
813  if(topTree[start].Daughter == -1) return;
814 
815  if(topTree[start].Count < countlimit &&
816  topTree[start].Cost < costlimit) {
817  /* truncate here */
818  topTree[start].Daughter = -1;
819  return;
820  }
821 
822  int j;
823  for(j = 0; j < 8; j ++) {
824  int sub = topTree[start].Daughter + j;
825  domain_toptree_truncate_r(topTree, sub, countlimit, costlimit);
826  }
827 }

References local_topnode_data::Daughter.

Referenced by domain_toptree_truncate().

Here is the caller graph for this function:

◆ domain_toptree_update_cost()

static void domain_toptree_update_cost ( struct local_topnode_data topTree,
int  start 
)
static

Definition at line 796 of file domain.c.

797 {
798  if(topTree[start].Daughter == -1) return;
799 
800  int j = 0;
801  for(j = 0; j < 8; j ++) {
802  int sub = topTree[start].Daughter + j;
803  domain_toptree_update_cost(topTree, sub);
804  topTree[start].Count += topTree[sub].Count;
805  topTree[start].Cost += topTree[sub].Cost;
806  }
807 }

References local_topnode_data::Cost, local_topnode_data::Count, and local_topnode_data::Daughter.

Referenced by domain_check_for_local_refine_subsample().

Here is the caller graph for this function:

◆ mp_order_by_key()

static void mp_order_by_key ( const void *  data,
void *  radix,
void *  arg 
)
static

Definition at line 1472 of file domain.c.

1473 {
1474  const struct local_particle_data * pa = (const struct local_particle_data *) data;
1475  ((uint64_t *) radix)[0] = pa->Key;
1476 }

References local_particle_data::Key.

Referenced by domain_check_for_local_refine_subsample().

Here is the caller graph for this function:

◆ order_by_key()

static int order_by_key ( const void *  a,
const void *  b 
)
static

Definition at line 1458 of file domain.c.

1459 {
1460  const struct local_particle_data * pa = (const struct local_particle_data *) a;
1461  const struct local_particle_data * pb = (const struct local_particle_data *) b;
1462  if(pa->Key < pb->Key)
1463  return -1;
1464 
1465  if(pa->Key > pb->Key)
1466  return +1;
1467 
1468  return 0;
1469 }

References local_particle_data::Key.

Referenced by domain_check_for_local_refine_subsample().

Here is the caller graph for this function:

◆ set_domain_par()

void set_domain_par ( DomainParams  dp)

Definition at line 78 of file domain.c.

79 {
80  domain_params = dp;
81 }

References domain_params.

Referenced by setup_tree(), and test_fof().

Here is the caller graph for this function:

◆ set_domain_params()

void set_domain_params ( ParameterSet ps)

Definition at line 84 of file domain.c.

85 {
86  int ThisTask;
87  MPI_Comm_rank(MPI_COMM_WORLD, &ThisTask);
88  if(ThisTask == 0) {
89  domain_params.DomainOverDecompositionFactor = param_get_int(ps, "DomainOverDecompositionFactor");
90  /* Create one domain per thread. This helps the balance and makes the treebuild merge faster*/
92  domain_params.DomainOverDecompositionFactor = omp_get_max_threads();
95  domain_params.TopNodeAllocFactor = param_get_double(ps, "TopNodeAllocFactor");
96  domain_params.DomainUseGlobalSorting = param_get_int(ps, "DomainUseGlobalSorting");
98  if((param_get_int(ps, "StarformationOn") && param_get_double(ps, "QuickLymanAlphaProbability") == 0.)
99  || param_get_int(ps, "BlackHoleOn"))
101  }
102  MPI_Bcast(&domain_params, sizeof(DomainParams), MPI_BYTE, 0, MPI_COMM_WORLD);
103 }
double param_get_double(ParameterSet *ps, const char *name)
Definition: paramset.c:336
int param_get_int(ParameterSet *ps, const char *name)
Definition: paramset.c:368

References domain_params, DomainParams::DomainOverDecompositionFactor, DomainParams::DomainUseGlobalSorting, param_get_double(), param_get_int(), DomainParams::SetAsideFactor, ThisTask, and DomainParams::TopNodeAllocFactor.

Referenced by read_parameter_file().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ topleaf_ext_order_by_key()

static int topleaf_ext_order_by_key ( const void *  c1,
const void *  c2 
)
static

Definition at line 510 of file domain.c.

511 {
512  const struct topleaf_extdata * p1 = (const struct topleaf_extdata *) c1;
513  const struct topleaf_extdata * p2 = (const struct topleaf_extdata *) c2;
514  if(p1->Key < p2->Key) return -1;
515  if(p1->Key > p2->Key) return 1;
516  return 0;
517 }

References topleaf_extdata::Key.

Referenced by domain_assign_balanced().

Here is the caller graph for this function:

◆ topleaf_ext_order_by_task_and_key()

static int topleaf_ext_order_by_task_and_key ( const void *  c1,
const void *  c2 
)
static

Definition at line 498 of file domain.c.

499 {
500  const struct topleaf_extdata * p1 = (const struct topleaf_extdata *) c1;
501  const struct topleaf_extdata * p2 = (const struct topleaf_extdata *) c2;
502  if(p1->Task < p2->Task) return -1;
503  if(p1->Task > p2->Task) return 1;
504  if(p1->Key < p2->Key) return -1;
505  if(p1->Key > p2->Key) return 1;
506  return 0;
507 }

References topleaf_extdata::Key, and topleaf_extdata::Task.

Referenced by domain_assign_balanced().

Here is the caller graph for this function:

Variable Documentation

◆ domain_params

DomainParams domain_params
static