MP-Gadget  5.0.1.dev1-76bc7d4726-dirty
Classes | Typedefs | Functions
openmpsort.c File Reference
#include <omp.h>
#include <stdlib.h>
#include <stdio.h>
#include <stddef.h>
#include <string.h>
#include <stdint.h>
#include "mymalloc.h"
Include dependency graph for openmpsort.c:

Go to the source code of this file.

Classes

struct  msort_param
 

Typedefs

typedef int(* __compar_fn_t) (const void *, const void *)
 

Functions

static void msort_with_tmp (const struct msort_param *p, void *b, size_t n)
 
static void merge (void *base1, size_t nmemb1, void *base2, size_t nmemb2, void *output, size_t size, int(*compar)(const void *, const void *), int indirect)
 
void qsort_openmp (void *base, size_t nmemb, size_t size, int(*compar)(const void *, const void *))
 

Typedef Documentation

◆ __compar_fn_t

typedef int(* __compar_fn_t) (const void *, const void *)

Definition at line 36 of file openmpsort.c.

Function Documentation

◆ merge()

static void merge ( void *  base1,
size_t  nmemb1,
void *  base2,
size_t  nmemb2,
void *  output,
size_t  size,
int(*)(const void *, const void *)  compar,
int  indirect 
)
static

Definition at line 174 of file openmpsort.c.

175  {
176  char * p1 = (char *) base1;
177  char * p2 = (char *) base2;
178  char * po = (char *) output;
179  char * s1 = p1 + nmemb1 * size, *s2 = p2 + nmemb2 * size;
180  while(p1 < s1 && p2 < s2) {
181  int cmp;
182  if(indirect) {
183  cmp = compar(*(void **)p1, *(void **)p2);
184  }
185  else {
186  cmp = compar(p1, p2);
187  }
188  if(cmp <= 0) {
189  memcpy(po, p1, size);
190  p1 += size;
191  } else {
192  memcpy(po, p2, size);
193  p2 += size;
194  }
195  po += size;
196  }
197  if(p1 < s1) {
198  memcpy(po, p1, s1 - p1);
199  }
200  if(p2 < s2) {
201  memcpy(po, p2, s2 - p2);
202  }
203 }

Referenced by qsort_openmp().

Here is the caller graph for this function:

◆ msort_with_tmp()

static void msort_with_tmp ( const struct msort_param p,
void *  b,
size_t  n 
)
static

Definition at line 49 of file openmpsort.c.

50 {
51  char *b1, *b2;
52  size_t n1, n2;
53 
54  if (n <= 1)
55  return;
56 
57  n1 = n / 2;
58  n2 = n - n1;
59  b1 = (char *) b;
60  b2 = (char *) b + (n1 * p->s);
61 
62  msort_with_tmp (p, b1, n1);
63  msort_with_tmp (p, b2, n2);
64 
65  char *tmp = p->t;
66  const size_t s = p->s;
67  __compar_fn_t cmp = p->cmp;
68  switch (p->var)
69  {
70  case 0:
71  while (n1 > 0 && n2 > 0)
72  {
73  if ((*cmp) (b1, b2) <= 0)
74  {
75  *(uint32_t *) tmp = *(uint32_t *) b1;
76  b1 += sizeof (uint32_t);
77  --n1;
78  }
79  else
80  {
81  *(uint32_t *) tmp = *(uint32_t *) b2;
82  b2 += sizeof (uint32_t);
83  --n2;
84  }
85  tmp += sizeof (uint32_t);
86  }
87  break;
88  case 1:
89  while (n1 > 0 && n2 > 0)
90  {
91  if ((*cmp) (b1, b2) <= 0)
92  {
93  *(uint64_t *) tmp = *(uint64_t *) b1;
94  b1 += sizeof (uint64_t);
95  --n1;
96  }
97  else
98  {
99  *(uint64_t *) tmp = *(uint64_t *) b2;
100  b2 += sizeof (uint64_t);
101  --n2;
102  }
103  tmp += sizeof (uint64_t);
104  }
105  break;
106  case 2:
107  while (n1 > 0 && n2 > 0)
108  {
109  unsigned long *tmpl = (unsigned long *) tmp;
110  unsigned long *bl;
111 
112  tmp += s;
113  if ((*cmp) (b1, b2) <= 0)
114  {
115  bl = (unsigned long *) b1;
116  b1 += s;
117  --n1;
118  }
119  else
120  {
121  bl = (unsigned long *) b2;
122  b2 += s;
123  --n2;
124  }
125  while (tmpl < (unsigned long *) tmp)
126  *tmpl++ = *bl++;
127  }
128  break;
129  case 3:
130  while (n1 > 0 && n2 > 0)
131  {
132  if ((*cmp) (*(const void **) b1, *(const void **) b2) <= 0)
133  {
134  *(void **) tmp = *(void **) b1;
135  b1 += sizeof (void *);
136  --n1;
137  }
138  else
139  {
140  *(void **) tmp = *(void **) b2;
141  b2 += sizeof (void *);
142  --n2;
143  }
144  tmp += sizeof (void *);
145  }
146  break;
147  default:
148  while (n1 > 0 && n2 > 0)
149  {
150  if ((*cmp) (b1, b2) <= 0)
151  {
152  tmp = (char *) memcpy (tmp, b1, s) + s;
153  b1 += s;
154  --n1;
155  }
156  else
157  {
158  tmp = (char *) memcpy (tmp, b2, s) + s;
159  b2 += s;
160  --n2;
161  }
162  }
163  break;
164  }
165 
166  if (n1 > 0)
167  memcpy (tmp, b1, n1 * s);
168  memcpy (b, p->t, (n - n2) * s);
169 }
int(* __compar_fn_t)(const void *, const void *)
Definition: openmpsort.c:36
static void msort_with_tmp(const struct msort_param *p, void *b, size_t n)
Definition: openmpsort.c:49
char * t
Definition: openmpsort.c:44
__compar_fn_t cmp
Definition: openmpsort.c:42
size_t s
Definition: openmpsort.c:40
size_t var
Definition: openmpsort.c:41

References msort_param::cmp, msort_param::s, msort_param::t, and msort_param::var.

Referenced by qsort_openmp().

Here is the caller graph for this function:

◆ qsort_openmp()

void qsort_openmp ( void *  base,
size_t  nmemb,
size_t  size,
int(*)(const void *, const void *)  compar 
)

Definition at line 205 of file openmpsort.c.

206  {
207  int Nt = omp_get_max_threads();
208  ptrdiff_t * Anmemb = ta_malloc("Anmemb", ptrdiff_t, Nt);
209  ptrdiff_t * Anmemb_old = ta_malloc("Anmemb_old", ptrdiff_t, Nt);
210 
211  void ** Abase_store = ta_malloc("Abase", void *, Nt);
212  void ** Atmp_store = ta_malloc("Atmp", void *, Nt);
213  void ** Abase = Abase_store;
214  void ** Atmp = Atmp_store;
215 
216  /*Should I use indirect sorting?*/
217  int indirect = 0;
218  if(size > 32)
219  indirect = 1;
220  void * tmp;
221  /*NOTE: if this allocation becomes a problem,
222  * switch to glibc's quicksort (serial!) */
223  if(indirect)
224  tmp = mymalloc("qsort",2*nmemb*sizeof(void *) + size);
225  else
226  tmp = mymalloc("qsort", size * nmemb);
227 
228 #pragma omp parallel
229  {
230  int tid = omp_get_thread_num();
231  /* actual number of threads*/
232  int Nt = omp_get_num_threads();
233 
234  /* domain decomposition */
235  ptrdiff_t start = tid * nmemb / Nt;
236  ptrdiff_t end = (tid + 1) * nmemb / Nt;
237  /* save decoposition to global array for merging */
238  Anmemb[tid] = end - start;
239  Anmemb_old[tid] = end - start;
240  Abase[tid] = ((char*) base) + start * size;
241  Atmp[tid] = ((char*) tmp) + start * size;
242 
243  struct msort_param p;
244  p.t = ((char **) Atmp)[tid];
245  p.s = size;
246  p.var = 4;
247  p.cmp = compar;
248  /*For large arrays we use an indirect sort following glibc*/
249  if(indirect)
250  {
251  /* Indirect sorting: copy everything in this thread to the new pointer space,
252  * which is after the tmp space. */
253  char *ip = (char *) Abase[tid];
254  void **tp = (void **) ((char *)tmp + (nmemb + start)* sizeof (void *));
255  void **t = tp;
256  void *end = (void *) (tp + Anmemb[tid]);
257 
258  while ((void *) t < end)
259  {
260  *t++ = ip;
261  ip += size;
262  }
263  p.s = sizeof (void *);
264  p.var = 3;
265  Abase[tid] = tp;
266  Atmp[tid] = ((char*) tmp) + start * p.s;
267  p.t = ((char **)Atmp)[tid];
268  }
269  else {
270  /*Copied from glibc*/
271  if ((size & (sizeof (uint32_t) - 1)) == 0
272  && ((char *) base - (char *) 0) % __alignof__ (uint32_t) == 0)
273  {
274  if (size == sizeof (uint32_t))
275  p.var = 0;
276  else if (size == sizeof (uint64_t)
277  && ((char *) base - (char *) 0) % __alignof__ (uint64_t) == 0)
278  p.var = 1;
279  else if ((size & (sizeof (unsigned long) - 1)) == 0
280  && ((char *) base - (char *) 0)
281  % __alignof__ (unsigned long) == 0)
282  p.var = 2;
283  }
284  /*End copied from glibc*/
285  }
286  msort_with_tmp (&p, Abase[tid], Anmemb[tid]);
287  /* now each sub array is sorted, kick start the merging */
288  int sep;
289  for (sep = 1; sep < Nt; sep *=2 ) {
290  int color = tid / sep;
291  int key = tid % sep;
292 #if 0
293  if(tid == 0) {
294  printf("sep = %d Abase[0] %p base %p, Atmp[0] %p tmp %p\n",
295  sep, Abase[0], base, Atmp[0], tmp);
296  printf("base 73134 = %d 73135 = %d\n", ((int*) base)[73134], ((int*) base)[73135]);
297  printf("tmp 73134 = %d 73135 = %d\n", ((int*) tmp )[73134], ((int*) tmp )[73135]);
298  }
299 #endif
300 #pragma omp barrier
301  /* only group leaders work */
302  if(key == 0 && color % 2 == 0) {
303  int nextT = tid + sep;
304  /*merge with next guy */
305  /* only even leaders arrives to this point*/
306  if(nextT >= Nt) {
307  /* no next guy, copy directly.*/
308  merge(Abase[tid], Anmemb[tid], NULL, 0, Atmp[tid], p.s, compar, indirect);
309  } else {
310 #if 0
311  printf("%d + %d merging with %td/%td:%td %td/%td:%td\n", tid, nextT,
312  ((char*)Abase[tid] - (char*) base) / size,
313  ((char*)Abase[tid] - (char*) tmp) / size,
314  Anmemb[tid],
315  ((char*)Abase[nextT] - (char*) base) / size,
316  ((char*)Abase[nextT] - (char*) tmp) / size,
317  Anmemb[nextT]);
318 #endif
319  merge(Abase[tid], Anmemb[tid], Abase[nextT], Anmemb[nextT], Atmp[tid], p.s, compar, indirect);
320  /* merge two lists */
321  Anmemb[tid] = Anmemb[tid] + Anmemb[nextT];
322  Anmemb[nextT] = 0;
323  }
324  }
325 
326  /* now swap Abase and Atmp for next merge */
327 #pragma omp barrier
328  if(tid == 0) {
329  void ** a = Abase;
330  Abase = Atmp;
331  Atmp = a;
332  }
333  /* at this point Abase contains the sorted array */
334  }
335 #pragma omp barrier
336  /* output was written to the tmp rather than desired location, copy it */
337  if((!indirect && Abase[0] != base)
338  || (indirect && Abase[0] != (char *) tmp + nmemb * sizeof(void *))) {
339  memmove(Atmp[tid], Abase[tid], Anmemb_old[tid] * p.s);
340  }
341  }
342 
343  ta_free(Atmp_store);
344  ta_free(Abase_store);
345  ta_free(Anmemb_old);
346  ta_free(Anmemb);
347 
348  /*Copied from glibc*/
349  /* tp[0] .. tp[n - 1] is now sorted, copy around entries of
350  the original array (done serially). Knuth vol. 3 (2nd ed.) exercise 5.2-10. */
351  if(indirect) {
352  char *kp, *ip;
353  size_t i;
354  void **tp = (void **) ((char *) tmp + nmemb * sizeof (void *));
355  void *tmp_storage = (void *) (tp + nmemb);
356  for (i = 0, ip = (char *) base; i < nmemb; i++, ip += size)
357  if ((kp = ((char **)tp)[i]) != ip)
358  {
359  size_t j = i;
360  char *jp = ip;
361  memcpy (tmp_storage, ip, size);
362 
363  do {
364  size_t k = (kp - (char *) base) / size;
365  tp[j] = jp;
366  memcpy (jp, kp, size);
367  j = k;
368  jp = kp;
369  kp = ((char **)tp)[k];
370  } while (kp != ip);
371 
372  tp[j] = jp;
373  memcpy (jp, tmp_storage, size);
374  }
375  }
376  myfree(tmp);
377 }
#define mymalloc(name, size)
Definition: mymalloc.h:15
#define ta_malloc(name, type, nele)
Definition: mymalloc.h:25
#define ta_free(p)
Definition: mymalloc.h:28
#define myfree(x)
Definition: mymalloc.h:19
static void merge(void *base1, size_t nmemb1, void *base2, size_t nmemb2, void *output, size_t size, int(*compar)(const void *, const void *), int indirect)
Definition: openmpsort.c:174

References msort_param::cmp, merge(), msort_with_tmp(), myfree, mymalloc, msort_param::s, msort_param::t, ta_free, ta_malloc, and msort_param::var.

Here is the call graph for this function: