Cyclops Tensor Framework
parallel arithmetic on multidimensional arrays
world.cxx
Go to the documentation of this file.
1 /*Copyright (c) 2011, Edgar Solomonik, all rights reserved.*/
2 
3 #include "common.h"
4 #include "world.h"
5 #include "../shared/lapack_symbs.h"
6 #include "../tensor/algstrct.h"
7 #include "../shared/util.h"
8 #include "../shared/memcontrol.h"
9 #include "../shared/offload.h"
10 
11 extern "C"
12 {
13  void CTF_linked() {}
14 }
15 
16 using namespace CTF_int;
17 
18 namespace CTF_int {
19  bool grid_wrapper::operator<(grid_wrapper const & other) const {
20  if (this->pr == other.pr)
21  return this->pc < other.pc;
22  else
23  return this->pr < other.pr;
24  }
25 
27  std::set<grid_wrapper> scalapack_grids;
28 }
29 
30 namespace CTF {
31  bool universe_exists = false;
32  World universe("");
33 
34  World::World(int argc,
35  char * const * argv){
36  comm = MPI_COMM_WORLD;
37 #ifdef BGQ
38  this->init(comm, TOPOLOGY_BGQ, argc, argv);
39 #else
40 #ifdef BGP
41  this->init(comm, TOPOLOGY_BGP, argc, argv);
42 #else
43  this->init(comm, TOPOLOGY_GENERIC, argc, argv);
44 #endif
45 #endif
46  }
47 
48 
49  World::World(MPI_Comm comm_,
50  int argc,
51  char * const * argv){
52  comm = comm_;
53 #ifdef BGQ
54  this->init(comm, TOPOLOGY_BGQ, argc, argv);
55 #else
56 #ifdef BGP
57  this->init(comm, TOPOLOGY_BGP, argc, argv);
58 #else
59  this->init(comm, TOPOLOGY_GENERIC, argc, argv);
60 #endif
61 #endif
62  }
63 
64 
65  World::World(int order,
66  int const * lens,
67  MPI_Comm comm_,
68  int argc,
69  char * const * argv){
70  comm = comm_;
71  this->init(comm, order, lens, argc, argv);
72  }
73 
74  World::World(World const & other){
75  comm = other.comm;
76 #if DEBUG >= 1
77  if (other.rank == 0){
78  printf("CTF WARNING: Creating copy of World, which is not free or useful, pass original World by reference instead if possible.\n");
79  }
80 #endif
81  //ASSERT(0);
82  this->init(comm, other.phys_topology->order, other.phys_topology->lens, 0, NULL);
83 /* cdt = other.cdt;
84  rank = other.rank;
85  np = other.np;
86  initialized = other.initialized;
87 
88  ASSERT(0);
89  for (int i=0; i<(int)other.topovec.size(); i++){
90  topovec.push_back(other.topovec[i]);
91  }*/
92  }
93 
94  World::World(char const * emptystring){}
95 
97  if (!is_copy && this != &universe){
98  for (int i=0; i<(int)topovec.size(); i++){
99  delete topovec[i];
100  }
101  delete phys_topology;
102  if (this->cdt.cm == MPI_COMM_WORLD){
103  ASSERT(universe_exists);
104  universe_exists = false;
105  }
106  topovec.clear();
107  }
108 
109  initialized = 0;
110  mem_exit(rank);
111  if (get_num_instances() == 0){
112  for (std::set<grid_wrapper>::iterator it=scalapack_grids.begin(); it!=scalapack_grids.end(); it++){
113  //printf("HERE %d %d %d\n",it->pr,it->pc,it->ctxt);
115  }
116  scalapack_grids.clear();
117 #ifdef OFFLOAD
118  offload_exit();
119 #endif
120 #ifdef HPM
121  HPM_Stop("CTF");
122 #endif
123  TAU_FSTOP(CTF);
124  }
125 
126  }
127 
128 
129  int World::init(MPI_Comm const global_context,
130  TOPOLOGY mach,
131  int argc,
132  const char * const * argv){
133  cdt = CommData(comm);
134  if (mach == TOPOLOGY_GENERIC)
135  phys_topology = NULL;
136  else
137  phys_topology = get_phys_topo(cdt, mach);
138 
139  return initialize(argc, argv);
140  }
141 
142  int World::init(MPI_Comm const global_context,
143  int order,
144  int const * dim_len,
145  int argc,
146  const char * const * argv){
147 
148  cdt = CommData(global_context);
149  phys_topology = new topology(order, dim_len, cdt, 1);
150 
151  return initialize(argc, argv);
152  }
153 
154 
155  int World::initialize(int argc,
156  const char * const * argv){
157  char * mst_size, * stack_size, * mem_size, * ppn;
158  if (comm == MPI_COMM_WORLD && universe_exists){
159  delete phys_topology;
160  *this = universe;
161  is_copy = true;
162  } else {
163  is_copy = false;
164  glob_wrld_rng.seed(CTF_int::get_num_instances());
165  MPI_Comm_rank(comm, &rank);
166  MPI_Comm_size(comm, &np);
167  if (phys_topology == NULL){
168  phys_topology = get_phys_topo(cdt, TOPOLOGY_GENERIC);
169  topovec = get_generic_topovec(cdt);
170 /* std::vector<topology*> topovec2;
171  topovec2 = peel_perm_torus(get_phys_topo(cdt, TOPOLOGY_GENERIC), cdt);
172  printf("topovec size is %ld, via old method was %ld\n",topovec.size(), topovec2.size());*/
173  } else
174  topovec = peel_perm_torus(phys_topology, cdt);
175  }
177  if (CTF_int::get_num_instances() == 1){
178  TAU_FSTART(CTF);
179  #ifdef HPM
180  HPM_Start("CTF");
181  #endif
182  #ifdef OFFLOAD
183  offload_init();
184  #endif
185  int all_np;
186  MPI_Comm_size(MPI_COMM_WORLD, &all_np);
187  if (all_np != np){
188  if (rank == 0){
189  printf("CTF ERROR: the first CTF instance created has to be on MPI_COMM_WORLD\n");
190  fflush(stdout);
191  }
192  MPI_Barrier(comm);
193  IASSERT(0);
194  }
195  init_rng(rank);
196 
197  CTF::set_context(cdt.cm);
198  CTF::set_main_args(argc, argv);
199 
200  #ifdef USE_OMP
201  char * ntd = getenv("OMP_NUM_THREADS");
202  if (ntd == NULL){
203  omp_set_num_threads(1);
204  if (rank == 0){
205  VPRINTF(1,"Running with 1 thread using omp_set_num_threads(1), because OMP_NUM_THREADS is not defined\n");
206  }
207  } else {
208  if (rank == 0 && ntd != NULL){
209  VPRINTF(1,"Running with %d threads\n",omp_get_max_threads());
210  }
211  }
212  #endif
213  // Get the environment variable FILE_PATH
214  char * file_path = getenv("CTF_MODEL_FILE");
215  if (file_path != NULL && strcmp(file_path,"")!=0){
216  VPRINTF(1,"Reading model coefficients from file %s (CTF_MODEL_FILE)\n", file_path);
217  std::string coeff_file;
218  coeff_file = std::string(file_path);
219  CTF_int::load_all_models(coeff_file);
220  }
221 
222  mst_size = getenv("CTF_MST_SIZE");
223  stack_size = getenv("CTF_STACK_SIZE");
224  if (mst_size == NULL && stack_size == NULL){
225  #if 0 //def USE_MST
226  if (rank == 0)
227  VPRINTF(1,"Creating stack of size %ld\n",1000*(int64_t)1E6);
228  CTF_int::mst_create(1000*(int64_t)1E6);
229  #else
230  if (rank == 0){
231 // VPRINTF(1,"Running without stack, define CTF_STACK_SIZE environment variable to activate stack\n");
232  }
233  #endif
234  } else {
235 #if 0
236  int64_t imst_size = 0 ;
237  if (mst_size != NULL)
238  imst_size = strtoull(mst_size,NULL,0);
239  if (stack_size != NULL)
240  imst_size = MAX(imst_size,strtoull(stack_size,NULL,0));
241  if (rank == 0)
242  printf("Creating stack of size %ld due to CTF_STACK_SIZE enviroment variable\n",
243  imst_size);
244  CTF_int::mst_create(imst_size);
245 #endif
246  }
247  mem_size = getenv("CTF_MEMORY_SIZE");
248  if (mem_size != NULL){
249  int64_t imem_size = strtoull(mem_size,NULL,0);
250  if (rank == 0)
251  VPRINTF(1,"Memory size set to %ld by CTF_MEMORY_SIZE environment variable\n",
252  imem_size);
253  CTF_int::set_mem_size(imem_size);
254  }
255  ppn = getenv("CTF_PPN");
256  if (ppn != NULL){
257  if (rank == 0)
258  printf("Assuming %d processes per node due to CTF_PPN environment variable\n",
259  atoi(ppn));
260  ASSERT(atoi(ppn)>=1);
261  #ifdef BGQ
262  CTF_int::set_memcap(.75);
263  #else
264  CTF_int::set_memcap(.75/atof(ppn));
265  #endif
266  }
267  if (rank == 0)
268  VPRINTF(1,"Total amount of memory available to process 0 is %ld\n", proc_bytes_available());
269  }
270  initialized = 1;
271  if (comm == MPI_COMM_WORLD){
272  if (!universe_exists){
273  universe_exists = true;
274  universe = *this;
275  }
276  }
277  return CTF_int::SUCCESS;
278  }
279 
280 /*
281  void World::contract_mst(){
282  std::list<mem_transfer> tfs = CTF_int::contract_mst();
283  if (tfs.size() > 0 && get_global_comm().rank == 0){
284  DPRINTF(1,"CTF Warning: contracting memory stack\n");
285  }
286  std::list<mem_transfer>::iterator it;
287  int i;
288  int j = 0;
289  for (it=tfs.begin(); it!=tfs.end(); it++){
290  j++;
291  for (i=0; i<(int)tensors.size(); i++){
292  if (tensors[i]->data == (dtype*)it->old_ptr){
293  tensors[i]->data = (dtype*)it->new_ptr;
294  break;
295  }
296  }
297  if (i == (int)tensors.size()){
298  printf("CTF ERROR: pointer %d on mst is not tensor data, aborting\n",j);
299  ASSERT(0);
300  }
301  for (i=0; i<(int)tensors.size(); i++){
302  if (tensors[i]->data == (dtype*)it->old_ptr){
303  tensors[i]->data = (dtype*)it->new_ptr;
304  }
305  }
306 
307  }*/
308 
310  if (!universe_exists){
311  World * pscp_universe = new World();
312  pscp_universe->is_copy=true;
313  delete pscp_universe;
314  }
315  return universe;
316  }
317 
318 }
void load_all_models(std::string file_name)
Definition: model.cxx:34
int64_t mem_size
Definition: memcontrol.cxx:72
void set_main_args(int argc, const char *const *argv)
Definition: int_timer.cxx:309
MPI_Comm comm
Definition: int_timer.cxx:22
def rank(self)
Definition: core.pyx:312
void mst_create(int64_t size)
initializes stack buffer
Definition: memcontrol.cxx:170
#define ASSERT(...)
Definition: util.h:88
void CTF_linked()
Definition: world.cxx:13
an instance of the CTF library (world) on a MPI communicator
Definition: world.h:19
void mem_exit(int rank)
exit instance of memory manager
Definition: memcontrol.cxx:207
#define IASSERT(...)
Definition: common.h:74
void init_rng(int rank)
initialized random number generator
Definition: common.cxx:23
string
Definition: core.pyx:456
#define MAX(a, b)
Definition: util.h:180
#define VPRINTF(...)
Definition: util.h:207
std::vector< topology * > peel_perm_torus(topology *phys_topology, CommData cdt)
folds specified topology and all of its permutations into all configurations of lesser dimensionality...
Definition: topology.cxx:488
int rank
rank of local processor
Definition: world.h:24
CTF::World World
Definition: back_comp.h:7
bool is_copy
Definition: world.h:97
void set_context(MPI_Comm ctxt)
Definition: int_timer.cxx:314
std::set< grid_wrapper > scalapack_grids
index for ScaLAPACK processor grids
Definition: world.cxx:27
CTF_int::topology * phys_topology
main torus topology corresponding to the world
Definition: world.h:34
topology * get_phys_topo(CommData glb_comm, TOPOLOGY mach)
get dimension and torus lengths of specified topology
Definition: topology.cxx:94
void offload_exit()
exit offloading, e.g. destroy cublas
World universe("")
#define TAU_FSTOP(ARG)
Definition: util.h:281
#define TAU_FSTART(ARG)
Definition: util.h:280
World & get_universe()
Definition: world.cxx:309
void cblacs_gridexit(int contxt)
void set_memcap(double cap)
sets what fraction of the memory capacity CTF can use
Definition: memcontrol.cxx:118
~World()
frees CTF library
Definition: world.cxx:96
void mem_create()
create instance of memory manager
Definition: memcontrol.cxx:187
int64_t proc_bytes_available()
gives total memory available on this MPI process
Definition: memcontrol.cxx:655
Definition: apsp.cxx:17
World(int argc, char *const *argv)
creates CTF library on comm that can output profile data into a file with a name based on the main ar...
Definition: world.cxx:34
std::vector< topology * > get_generic_topovec(CommData cdt)
computes all topology configurations given undelying physical topology information ...
Definition: topology.cxx:449
bool universe_exists
Definition: world.cxx:31
void offload_init()
initialize offloading, e.g. create cublas
bool operator<(grid_wrapper const &other) const
Definition: world.cxx:19
void set_mem_size(int64_t size)
sets what fraction of the memory capacity CTF can use
Definition: memcontrol.cxx:110
TOPOLOGY
Definition: topology.h:10
int get_num_instances()
Definition: memcontrol.cxx:531
MPI_Comm comm
set of processors making up this world
Definition: world.h:22
def np(self)
Definition: core.pyx:315