Tpetra parallel linear algebra  Version of the Day
Tpetra_DirectoryImpl_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Tpetra: Templated Linear Algebra Services Package
5 // Copyright (2008) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ************************************************************************
40 // @HEADER
41 
42 #ifndef __Tpetra_DirectoryImpl_def_hpp
43 #define __Tpetra_DirectoryImpl_def_hpp
44 
47 
48 #include "Tpetra_Distributor.hpp"
49 #include "Tpetra_Map.hpp"
50 #include "Tpetra_TieBreak.hpp"
51 #include "Tpetra_Details_FixedHashTable.hpp"
52 #include "Teuchos_Comm.hpp"
53 #include <memory>
54 #include <sstream>
55 
56 // FIXME (mfh 16 Apr 2013) GIANT HACK BELOW
57 #ifdef HAVE_TPETRACORE_MPI
58 # include <mpi.h>
59 #endif // HAVE_TPETRACORE_MPI
60 // FIXME (mfh 16 Apr 2013) GIANT HACK ABOVE
61 
62 namespace Tpetra {
63  namespace Details {
64  template<class LO, class GO, class NT>
67  getEntries (const map_type& map,
68  const Teuchos::ArrayView<const GO> &globalIDs,
69  const Teuchos::ArrayView<int> &nodeIDs,
70  const Teuchos::ArrayView<LO> &localIDs,
71  const bool computeLIDs) const
72  {
73  // Ensure that globalIDs, nodeIDs, and localIDs (if applicable)
74  // all have the same size, before modifying any output arguments.
75  TEUCHOS_TEST_FOR_EXCEPTION(nodeIDs.size() != globalIDs.size(),
76  std::invalid_argument, Teuchos::typeName(*this) << "::getEntries(): "
77  "Output arrays do not have the right sizes. nodeIDs.size() = "
78  << nodeIDs.size() << " != globalIDs.size() = " << globalIDs.size()
79  << ".");
80  TEUCHOS_TEST_FOR_EXCEPTION(
81  computeLIDs && localIDs.size() != globalIDs.size(),
82  std::invalid_argument, Teuchos::typeName(*this) << "::getEntries(): "
83  "Output array do not have the right sizes. localIDs.size() = "
84  << localIDs.size() << " != globalIDs.size() = " << globalIDs.size()
85  << ".");
86 
87  // Initially, fill nodeIDs and localIDs (if applicable) with
88  // invalid values. The "invalid" process ID is -1 (this means
89  // the same thing as MPI_ANY_SOURCE to Teuchos, so it's an
90  // "invalid" process ID); the invalid local ID comes from
91  // OrdinalTraits.
92  std::fill (nodeIDs.begin(), nodeIDs.end(), -1);
93  if (computeLIDs) {
94  std::fill (localIDs.begin(), localIDs.end(),
95  Teuchos::OrdinalTraits<LO>::invalid ());
96  }
97  // Actually do the work.
98  return this->getEntriesImpl (map, globalIDs, nodeIDs, localIDs, computeLIDs);
99  }
100 
101 
102  template<class LO, class GO, class NT>
104  ReplicatedDirectory (const map_type& map) :
105  numProcs_ (map.getComm ()->getSize ())
106  {}
107 
108 
109  template<class LO, class GO, class NT>
110  bool
112  isOneToOne (const Teuchos::Comm<int>& /* comm */) const
113  {
114  // A locally replicated Map is one-to-one only if there is no
115  // replication, that is, only if the Map's communicator only has
116  // one process.
117  return (numProcs_ == 1);
118  }
119 
120 
121  template<class LO, class GO, class NT>
122  std::string
124  {
125  std::ostringstream os;
126  os << "ReplicatedDirectory"
127  << "<" << Teuchos::TypeNameTraits<LO>::name ()
128  << ", " << Teuchos::TypeNameTraits<GO>::name ()
129  << ", " << Teuchos::TypeNameTraits<NT>::name () << ">";
130  return os.str ();
131  }
132 
133 
134  template<class LO, class GO, class NT>
136  ContiguousUniformDirectory (const map_type& map)
137  {
138  TEUCHOS_TEST_FOR_EXCEPTION(! map.isContiguous (), std::invalid_argument,
139  Teuchos::typeName (*this) << " constructor: Map is not contiguous.");
140  TEUCHOS_TEST_FOR_EXCEPTION(! map.isUniform (), std::invalid_argument,
141  Teuchos::typeName (*this) << " constructor: Map is not uniform.");
142  }
143 
144 
145  template<class LO, class GO, class NT>
146  std::string
148  {
149  std::ostringstream os;
150  os << "ContiguousUniformDirectory"
151  << "<" << Teuchos::TypeNameTraits<LO>::name ()
152  << ", " << Teuchos::TypeNameTraits<GO>::name ()
153  << ", " << Teuchos::TypeNameTraits<NT>::name () << ">";
154  return os.str ();
155  }
156 
157 
158  template<class LO, class GO, class NT>
161  getEntriesImpl (const map_type& map,
162  const Teuchos::ArrayView<const GO> &globalIDs,
163  const Teuchos::ArrayView<int> &nodeIDs,
164  const Teuchos::ArrayView<LO> &localIDs,
165  const bool computeLIDs) const
166  {
167  using Teuchos::Comm;
168  using Teuchos::RCP;
169  typedef typename Teuchos::ArrayView<const GO>::size_type size_type;
170  const LO invalidLid = Teuchos::OrdinalTraits<LO>::invalid ();
172 
173  RCP<const Comm<int> > comm = map.getComm ();
174  const GO g_min = map.getMinAllGlobalIndex ();
175 
176  // Let N_G be the global number of elements in the Map,
177  // and P be the number of processes in its communicator.
178  // Then, N_G = P * N_L + R = R*(N_L + 1) + (P - R)*N_L.
179  //
180  // The first R processes own N_L+1 elements.
181  // The remaining P-R processes own N_L elements.
182  //
183  // Let g be the current GID, g_min be the global minimum GID,
184  // and g_0 = g - g_min. If g is a valid GID in this Map, then
185  // g_0 is in [0, N_G - 1].
186  //
187  // If g is a valid GID in this Map and g_0 < R*(N_L + 1), then
188  // the rank of the process that owns g is floor(g_0 / (N_L +
189  // 1)), and its corresponding local index on that process is g_0
190  // mod (N_L + 1).
191  //
192  // Let g_R = g_0 - R*(N_L + 1). If g is a valid GID in this Map
193  // and g_0 >= R*(N_L + 1), then the rank of the process that
194  // owns g is then R + floor(g_R / N_L), and its corresponding
195  // local index on that process is g_R mod N_L.
196 
197  const size_type N_G =
198  static_cast<size_type> (map.getGlobalNumElements ());
199  const size_type P = static_cast<size_type> (comm->getSize ());
200  const size_type N_L = N_G / P;
201  const size_type R = N_G - N_L * P; // N_G mod P
202  const size_type N_R = R * (N_L + static_cast<size_type> (1));
203 
204 #ifdef HAVE_TPETRA_DEBUG
205  TEUCHOS_TEST_FOR_EXCEPTION(
206  N_G != P*N_L + R, std::logic_error,
207  "Tpetra::ContiguousUniformDirectory::getEntriesImpl: "
208  "N_G = " << N_G << " != P*N_L + R = " << P << "*" << N_L << " + " << R
209  << " = " << P*N_L + R << ". "
210  "Please report this bug to the Tpetra developers.");
211 #endif // HAVE_TPETRA_DEBUG
212 
213  const size_type numGids = globalIDs.size (); // for const loop bound
214  // Avoid signed/unsigned comparisons below, in case GO is
215  // unsigned. (Integer literals are generally signed.)
216  const GO ONE = static_cast<GO> (1);
217 
218  if (computeLIDs) {
219  for (size_type k = 0; k < numGids; ++k) {
220  const GO g_0 = globalIDs[k] - g_min;
221 
222  // The first test is a little strange just in case GO is
223  // unsigned. Compilers raise a warning on tests like "x <
224  // 0" if x is unsigned, but don't usually raise a warning if
225  // the expression is a bit more complicated than that.
226  if (g_0 + ONE < ONE || g_0 >= static_cast<GO> (N_G)) {
227  nodeIDs[k] = -1;
228  localIDs[k] = invalidLid;
229  res = IDNotPresent;
230  }
231  else if (g_0 < static_cast<GO> (N_R)) {
232  // The GID comes from the initial sequence of R processes.
233  nodeIDs[k] = static_cast<int> (g_0 / static_cast<GO> (N_L + 1));
234  localIDs[k] = static_cast<LO> (g_0 % static_cast<GO> (N_L + 1));
235  }
236  else if (g_0 >= static_cast<GO> (N_R)) {
237  // The GID comes from the remaining P-R processes.
238  const GO g_R = g_0 - static_cast<GO> (N_R);
239  nodeIDs[k] = static_cast<int> (R + g_R / N_L);
240  localIDs[k] = static_cast<int> (g_R % N_L);
241  }
242 #ifdef HAVE_TPETRA_DEBUG
243  else {
244  TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error,
245  "Tpetra::ContiguousUniformDirectory::getEntriesImpl: "
246  "should never get here. "
247  "Please report this bug to the Tpetra developers.");
248  }
249 #endif // HAVE_TPETRA_DEBUG
250  }
251  }
252  else { // don't compute local indices
253  for (size_type k = 0; k < numGids; ++k) {
254  const GO g_0 = globalIDs[k] - g_min;
255  // The first test is a little strange just in case GO is
256  // unsigned. Compilers raise a warning on tests like "x <
257  // 0" if x is unsigned, but don't usually raise a warning if
258  // the expression is a bit more complicated than that.
259  if (g_0 + ONE < ONE || g_0 >= static_cast<GO> (N_G)) {
260  nodeIDs[k] = -1;
261  res = IDNotPresent;
262  }
263  else if (g_0 < static_cast<GO> (N_R)) {
264  // The GID comes from the initial sequence of R processes.
265  nodeIDs[k] = static_cast<int> (g_0 / static_cast<GO> (N_L + 1));
266  }
267  else if (g_0 >= static_cast<GO> (N_R)) {
268  // The GID comes from the remaining P-R processes.
269  const GO g_R = g_0 - static_cast<GO> (N_R);
270  nodeIDs[k] = static_cast<int> (R + g_R / N_L);
271  }
272 #ifdef HAVE_TPETRA_DEBUG
273  else {
274  TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error,
275  "Tpetra::ContiguousUniformDirectory::getEntriesImpl: "
276  "should never get here. "
277  "Please report this bug to the Tpetra developers.");
278  }
279 #endif // HAVE_TPETRA_DEBUG
280  }
281  }
282  return res;
283  }
284 
285  template<class LO, class GO, class NT>
287  DistributedContiguousDirectory (const map_type& map)
288  {
289  using Teuchos::arcp;
290  using Teuchos::gatherAll;
291  using Teuchos::RCP;
292 
293  RCP<const Teuchos::Comm<int> > comm = map.getComm ();
294 
295  TEUCHOS_TEST_FOR_EXCEPTION(! map.isDistributed (), std::invalid_argument,
296  Teuchos::typeName (*this) << " constructor: Map is not distributed.");
297  TEUCHOS_TEST_FOR_EXCEPTION(! map.isContiguous (), std::invalid_argument,
298  Teuchos::typeName (*this) << " constructor: Map is not contiguous.");
299 
300  const int numProcs = comm->getSize ();
301 
302  // Make room for the min global ID on each process, plus one
303  // entry at the end for the "max cap."
304  allMinGIDs_ = arcp<GO> (numProcs + 1);
305  // Get my process' min global ID.
306  GO minMyGID = map.getMinGlobalIndex ();
307  // Gather all of the min global IDs into the first numProcs
308  // entries of allMinGIDs_.
309 
310  // FIXME (mfh 16 Apr 2013) GIANT HACK BELOW
311  //
312  // The purpose of this giant hack is that gatherAll appears to
313  // interpret the "receive count" argument differently than
314  // MPI_Allgather does. Matt Bettencourt reports Valgrind issues
315  // (memcpy with overlapping data) with MpiComm<int>::gatherAll,
316  // which could relate either to this, or to OpenMPI.
317 #ifdef HAVE_TPETRACORE_MPI
318  MPI_Datatype rawMpiType = MPI_INT;
319  bool useRawMpi = true;
320  if (typeid (GO) == typeid (int)) {
321  rawMpiType = MPI_INT;
322  } else if (typeid (GO) == typeid (long)) {
323  rawMpiType = MPI_LONG;
324  } else {
325  useRawMpi = false;
326  }
327  if (useRawMpi) {
328  using Teuchos::rcp_dynamic_cast;
329  using Teuchos::MpiComm;
330  RCP<const MpiComm<int> > mpiComm =
331  rcp_dynamic_cast<const MpiComm<int> > (comm);
332  // It could be a SerialComm instead, even in an MPI build, so
333  // be sure to check.
334  if (! comm.is_null ()) {
335  MPI_Comm rawMpiComm = * (mpiComm->getRawMpiComm ());
336  const int err =
337  MPI_Allgather (&minMyGID, 1, rawMpiType,
338  allMinGIDs_.getRawPtr (), 1, rawMpiType,
339  rawMpiComm);
340  TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error,
341  "Tpetra::DistributedContiguousDirectory: MPI_Allgather failed");
342  } else {
343  gatherAll<int, GO> (*comm, 1, &minMyGID, numProcs, allMinGIDs_.getRawPtr ());
344  }
345  } else {
346  gatherAll<int, GO> (*comm, 1, &minMyGID, numProcs, allMinGIDs_.getRawPtr ());
347  }
348 #else // NOT HAVE_TPETRACORE_MPI
349  gatherAll<int, GO> (*comm, 1, &minMyGID, numProcs, allMinGIDs_.getRawPtr ());
350 #endif // HAVE_TPETRACORE_MPI
351  // FIXME (mfh 16 Apr 2013) GIANT HACK ABOVE
352 
353  //gatherAll<int, GO> (*comm, 1, &minMyGID, numProcs, allMinGIDs_.getRawPtr ());
354 
355  // Put the max cap at the end. Adding one lets us write loops
356  // over the global IDs with the usual strict less-than bound.
357  allMinGIDs_[numProcs] = map.getMaxAllGlobalIndex ()
358  + Teuchos::OrdinalTraits<GO>::one ();
359  }
360 
361  template<class LO, class GO, class NT>
362  std::string
364  {
365  std::ostringstream os;
366  os << "DistributedContiguousDirectory"
367  << "<" << Teuchos::TypeNameTraits<LO>::name ()
368  << ", " << Teuchos::TypeNameTraits<GO>::name ()
369  << ", " << Teuchos::TypeNameTraits<NT>::name () << ">";
370  return os.str ();
371  }
372 
373  template<class LO, class GO, class NT>
376  getEntriesImpl (const map_type& map,
377  const Teuchos::ArrayView<const GO> &globalIDs,
378  const Teuchos::ArrayView<int> &nodeIDs,
379  const Teuchos::ArrayView<LO> &localIDs,
380  const bool computeLIDs) const
381  {
382  using Teuchos::Array;
383  using Teuchos::ArrayRCP;
384  using Teuchos::ArrayView;
385  using Teuchos::as;
386  using Teuchos::Comm;
387  using Teuchos::RCP;
388 
390  RCP<const Teuchos::Comm<int> > comm = map.getComm ();
391  const int myRank = comm->getRank ();
392 
393  // Map is on one process or is locally replicated.
394  typename ArrayView<int>::iterator procIter = nodeIDs.begin();
395  typename ArrayView<LO>::iterator lidIter = localIDs.begin();
396  typename ArrayView<const GO>::iterator gidIter;
397  for (gidIter = globalIDs.begin(); gidIter != globalIDs.end(); ++gidIter) {
398  if (map.isNodeGlobalElement (*gidIter)) {
399  *procIter++ = myRank;
400  if (computeLIDs) {
401  *lidIter++ = map.getLocalElement (*gidIter);
402  }
403  }
404  else {
405  // Advance the pointers, leaving these values set to invalid
406  procIter++;
407  if (computeLIDs) {
408  lidIter++;
409  }
410  res = IDNotPresent;
411  }
412  }
413  return res;
414  }
415 
416  template<class LO, class GO, class NT>
419  getEntriesImpl (const map_type& map,
420  const Teuchos::ArrayView<const GO> &globalIDs,
421  const Teuchos::ArrayView<int> &nodeIDs,
422  const Teuchos::ArrayView<LO> &localIDs,
423  const bool computeLIDs) const
424  {
425  using Teuchos::Array;
426  using Teuchos::ArrayRCP;
427  using Teuchos::ArrayView;
428  using Teuchos::as;
429  using Teuchos::Comm;
430  using Teuchos::RCP;
431 
432  RCP<const Teuchos::Comm<int> > comm = map.getComm ();
433  const int numProcs = comm->getSize ();
434  const global_size_t nOverP = map.getGlobalNumElements () / numProcs;
435  const LO LINVALID = Teuchos::OrdinalTraits<LO>::invalid();
437 
438  // Map is distributed but contiguous.
439  typename ArrayView<int>::iterator procIter = nodeIDs.begin();
440  typename ArrayView<LO>::iterator lidIter = localIDs.begin();
441  typename ArrayView<const GO>::iterator gidIter;
442  for (gidIter = globalIDs.begin(); gidIter != globalIDs.end(); ++gidIter) {
443  LO LID = LINVALID; // Assume not found until proven otherwise
444  int image = -1;
445  GO GID = *gidIter;
446  // Guess uniform distribution and start a little above it
447  // TODO: replace by a binary search
448  int curRank;
449  { // We go through all this trouble to avoid overflow and
450  // signed / unsigned casting mistakes (that were made in
451  // previous versions of this code).
452  const GO one = as<GO> (1);
453  const GO two = as<GO> (2);
454  const GO nOverP_GID = as<GO> (nOverP);
455  const GO lowerBound = GID / std::max(nOverP_GID, one) + two;
456  curRank = as<int>(std::min(lowerBound, as<GO>(numProcs - 1)));
457  }
458  bool found = false;
459  while (curRank >= 0 && curRank < numProcs) {
460  if (allMinGIDs_[curRank] <= GID) {
461  if (GID < allMinGIDs_[curRank + 1]) {
462  found = true;
463  break;
464  }
465  else {
466  curRank++;
467  }
468  }
469  else {
470  curRank--;
471  }
472  }
473  if (found) {
474  image = curRank;
475  LID = as<LO> (GID - allMinGIDs_[image]);
476  }
477  else {
478  res = IDNotPresent;
479  }
480  *procIter++ = image;
481  if (computeLIDs) {
482  *lidIter++ = LID;
483  }
484  }
485  return res;
486  }
487 
488  template<class LO, class GO, class NT>
490  DistributedNoncontiguousDirectory (const map_type& map) :
491  oneToOneResult_ (ONE_TO_ONE_NOT_CALLED_YET), // to be revised below
492  locallyOneToOne_ (true), // to be revised below
493  useHashTables_ (false) // to be revised below
494  {
495  initialize (map, Teuchos::null);
496  }
497 
498  template<class LO, class GO, class NT>
499  DistributedNoncontiguousDirectory<LO, GO, NT>::
500  DistributedNoncontiguousDirectory (const map_type& map,
501  const tie_break_type& tie_break) :
502  oneToOneResult_ (ONE_TO_ONE_NOT_CALLED_YET), // to be revised below
503  locallyOneToOne_ (true), // to be revised below
504  useHashTables_ (false) // to be revised below
505  {
506  initialize (map, Teuchos::ptrFromRef (tie_break));
507  }
508 
509  template<class LO, class GO, class NT>
510  void
511  DistributedNoncontiguousDirectory<LO, GO, NT>::
512  initialize (const map_type& map,
513  Teuchos::Ptr<const tie_break_type> tie_break)
514  {
515  using Teuchos::arcp;
516  using Teuchos::Array;
517  using Teuchos::ArrayRCP;
518  using Teuchos::ArrayView;
519  using Teuchos::as;
520  using Teuchos::RCP;
521  using Teuchos::rcp;
522  using Teuchos::typeName;
523  using Teuchos::TypeNameTraits;
524  using std::cerr;
525  using std::endl;
526  typedef Array<int>::size_type size_type;
527 
528  // This class' implementation of getEntriesImpl() currently
529  // encodes the following assumptions:
530  //
531  // 1. global_size_t >= GO
532  // 2. global_size_t >= int
533  // 3. global_size_t >= LO
534  //
535  // We check these assumptions here.
536  TEUCHOS_TEST_FOR_EXCEPTION(sizeof(global_size_t) < sizeof(GO),
537  std::logic_error, typeName (*this) << ": sizeof(Tpetra::"
538  "global_size_t) = " << sizeof(global_size_t) << " < sizeof(Global"
539  "Ordinal = " << TypeNameTraits<LO>::name () << ") = " << sizeof(GO)
540  << ".");
541  TEUCHOS_TEST_FOR_EXCEPTION(sizeof(global_size_t) < sizeof(int),
542  std::logic_error, typeName (*this) << ": sizeof(Tpetra::"
543  "global_size_t) = " << sizeof(global_size_t) << " < sizeof(int) = "
544  << sizeof(int) << ".");
545  TEUCHOS_TEST_FOR_EXCEPTION(sizeof(global_size_t) < sizeof(LO),
546  std::logic_error, typeName (*this) << ": sizeof(Tpetra::"
547  "global_size_t) = " << sizeof(global_size_t) << " < sizeof(Local"
548  "Ordinal = " << TypeNameTraits<LO>::name () << ") = " << sizeof(LO)
549  << ".");
550 
551  RCP<const Teuchos::Comm<int> > comm = map.getComm ();
552  const LO LINVALID = Teuchos::OrdinalTraits<LO>::invalid ();
553  const GO minAllGID = map.getMinAllGlobalIndex ();
554  const GO maxAllGID = map.getMaxAllGlobalIndex ();
555 
556  // The "Directory Map" (see below) will have a range of elements
557  // from the minimum to the maximum GID of the user Map, and a
558  // minimum GID of minAllGID from the user Map. It doesn't
559  // actually have to store all those entries, though do beware of
560  // calling getNodeElementList on it (see Bug 5822).
561  const global_size_t numGlobalEntries = maxAllGID - minAllGID + 1;
562 
563  // We can't afford to replicate the whole directory on each
564  // process, so create the "Directory Map", a uniform contiguous
565  // Map that describes how we will distribute the directory over
566  // processes.
567  //
568  // FIXME (mfh 08 May 2012) Here we're setting minAllGID to be
569  // the index base. The index base should be separate from the
570  // minimum GID.
571  directoryMap_ = rcp (new map_type (numGlobalEntries, minAllGID, comm,
572  GloballyDistributed));
573  // The number of Directory elements that my process owns.
574  const size_t dir_numMyEntries = directoryMap_->getNodeNumElements ();
575 
576  // Fix for Bug 5822: If the input Map is "sparse," that is if
577  // the difference between the global min and global max GID is
578  // much larger than the global number of elements in the input
579  // Map, then it's possible that the Directory Map might have
580  // many more entries than the input Map on this process. This
581  // can cause memory scalability issues. In that case, we switch
582  // from the array-based implementation of Directory storage to
583  // the hash table - based implementation. We don't use hash
584  // tables all the time, because they are slower in the common
585  // case of a nonsparse Map.
586  //
587  // NOTE: This is a per-process decision. Some processes may use
588  // array-based storage, whereas others may use hash table -
589  // based storage.
590 
591  // A hash table takes a constant factor more space, more or
592  // less, than an array. Thus, it's not worthwhile, even in
593  // terms of memory usage, always to use a hash table.
594  // Furthermore, array lookups are faster than hash table
595  // lookups, so it may be worthwhile to use an array even if it
596  // takes more space. The "sparsity threshold" governs when to
597  // switch to a hash table - based implementation.
598  const size_t inverseSparsityThreshold = 10;
599  useHashTables_ =
600  (dir_numMyEntries >= inverseSparsityThreshold * map.getNodeNumElements());
601 
602  // Get list of process IDs that own the directory entries for the
603  // Map GIDs. These will be the targets of the sends that the
604  // Distributor will do.
605  const int myRank = comm->getRank ();
606  const size_t numMyEntries = map.getNodeNumElements ();
607  Array<int> sendImageIDs (numMyEntries);
608  ArrayView<const GO> myGlobalEntries = map.getNodeElementList ();
609  // An ID not present in this lookup indicates that it lies outside
610  // of the range [minAllGID,maxAllGID] (from map_). this means
611  // something is wrong with map_, our fault.
612  const LookupStatus lookupStatus =
613  directoryMap_->getRemoteIndexList (myGlobalEntries, sendImageIDs);
614  TEUCHOS_TEST_FOR_EXCEPTION(
615  lookupStatus == IDNotPresent, std::logic_error, Teuchos::typeName(*this)
616  << " constructor: the Directory Map could not find out where one or "
617  "more of my Map's indices should go. The input to getRemoteIndexList "
618  "is " << Teuchos::toString (myGlobalEntries) << ", and the output is "
619  << Teuchos::toString (sendImageIDs ()) << ". The input Map itself has "
620  "the following entries on the calling process " <<
621  map.getComm ()->getRank () << ": " <<
622  Teuchos::toString (map.getNodeElementList ()) << ", and has "
623  << map.getGlobalNumElements () << " total global indices in ["
624  << map.getMinAllGlobalIndex () << "," << map.getMaxAllGlobalIndex ()
625  << "]. The Directory Map has "
626  << directoryMap_->getGlobalNumElements () << " total global indices in "
627  "[" << directoryMap_->getMinAllGlobalIndex () << "," <<
628  directoryMap_->getMaxAllGlobalIndex () << "], and the calling process "
629  "has GIDs [" << directoryMap_->getMinGlobalIndex () << "," <<
630  directoryMap_->getMaxGlobalIndex () << "]. "
631  "This probably means there is a bug in Map or Directory. "
632  "Please report this bug to the Tpetra developers.");
633 
634  // Initialize the distributor using the list of process IDs to
635  // which to send. We'll use the distributor to send out triples
636  // of (GID, process ID, LID). We're sending the entries to the
637  // processes that the Directory Map says should own them, which is
638  // why we called directoryMap_->getRemoteIndexList() above.
639  Distributor distor (comm);
640  const size_t numReceives = distor.createFromSends (sendImageIDs);
641 
642  // NOTE (mfh 21 Mar 2012) The following code assumes that
643  // sizeof(GO) >= sizeof(int) and sizeof(GO) >= sizeof(LO).
644  //
645  // Create and fill buffer of (GID, PID, LID) triples to send
646  // out. We pack the (GID, PID, LID) triples into a single Array
647  // of GO, casting the PID from int to GO and the LID from LO to
648  // GO as we do so.
649  //
650  // FIXME (mfh 23 Mar 2014) This assumes that sizeof(LO) <=
651  // sizeof(GO) and sizeof(int) <= sizeof(GO). The former is
652  // required, and the latter is generally the case, but we should
653  // still check for this.
654  const int packetSize = 3; // We're sending triples, so packet size is 3.
655  Array<GO> exportEntries (packetSize * numMyEntries); // data to send out
656  {
657  size_type exportIndex = 0;
658  for (size_type i = 0; i < static_cast<size_type> (numMyEntries); ++i) {
659  exportEntries[exportIndex++] = myGlobalEntries[i];
660  exportEntries[exportIndex++] = as<GO> (myRank);
661  exportEntries[exportIndex++] = as<GO> (i);
662  }
663  }
664  // Buffer of data to receive. The Distributor figured out for
665  // us how many packets we're receiving, when we called its
666  // createFromSends() method to set up the distribution plan.
667  Array<GO> importElements (packetSize * distor.getTotalReceiveLength ());
668 
669  // Distribute the triples of (GID, process ID, LID).
670  distor.doPostsAndWaits (exportEntries ().getConst (), packetSize, importElements ());
671 
672  // Unpack the redistributed data. Both implementations of
673  // Directory storage map from an LID in the Directory Map (which
674  // is the LID of the GID to store) to either a PID or an LID in
675  // the input Map. Each "packet" (contiguous chunk of
676  // importElements) contains a triple: (GID, PID, LID).
677  if (useHashTables_) {
678  // Create the hash tables. We know exactly how many elements
679  // to expect in each hash table. FixedHashTable's constructor
680  // currently requires all the keys and values at once, so we
681  // have to extract them in temporary arrays. It may be
682  // possible to rewrite FixedHashTable to use a "start fill" /
683  // "end fill" approach that avoids the temporary arrays, but
684  // we won't try that for now.
685 
686  // The constructors of Array and ArrayRCP that take a number
687  // of elements all initialize the arrays. Instead, allocate
688  // raw arrays, then hand them off to ArrayRCP, to avoid the
689  // initial unnecessary initialization without losing the
690  // benefit of exception safety (and bounds checking, in a
691  // debug build).
692  LO* tableKeysRaw = NULL;
693  LO* tableLidsRaw = NULL;
694  int* tablePidsRaw = NULL;
695  try {
696  tableKeysRaw = new LO [numReceives];
697  tableLidsRaw = new LO [numReceives];
698  tablePidsRaw = new int [numReceives];
699  } catch (...) {
700  if (tableKeysRaw != NULL) {
701  delete [] tableKeysRaw;
702  }
703  if (tableLidsRaw != NULL) {
704  delete [] tableLidsRaw;
705  }
706  if (tablePidsRaw != NULL) {
707  delete [] tablePidsRaw;
708  }
709  throw;
710  }
711  ArrayRCP<LO> tableKeys (tableKeysRaw, 0, numReceives, true);
712  ArrayRCP<LO> tableLids (tableLidsRaw, 0, numReceives, true);
713  ArrayRCP<int> tablePids (tablePidsRaw, 0, numReceives, true);
714 
715  if (tie_break.is_null ()) {
716  // Fill the temporary arrays of keys and values.
717  size_type importIndex = 0;
718  for (size_type i = 0; i < static_cast<size_type> (numReceives); ++i) {
719  const GO curGID = importElements[importIndex++];
720  const LO curLID = directoryMap_->getLocalElement (curGID);
721  TEUCHOS_TEST_FOR_EXCEPTION(
722  curLID == LINVALID, std::logic_error,
723  Teuchos::typeName(*this) << " constructor: Incoming global index "
724  << curGID << " does not have a corresponding local index in the "
725  "Directory Map. Please report this bug to the Tpetra developers.");
726  tableKeys[i] = curLID;
727  tablePids[i] = importElements[importIndex++];
728  tableLids[i] = importElements[importIndex++];
729  }
730  // Set up the hash tables. The hash tables' constructor
731  // detects whether there are duplicates, so that we can set
732  // locallyOneToOne_.
733  typedef Kokkos::Device<typename NT::execution_space,
734  typename NT::memory_space> DT;
735  lidToPidTable_ =
736  rcp (new Details::FixedHashTable<LO, int, DT> (tableKeys (),
737  tablePids ()));
738  locallyOneToOne_ = ! (lidToPidTable_->hasDuplicateKeys ());
739  lidToLidTable_ =
740  rcp (new Details::FixedHashTable<LO, LO, DT> (tableKeys (),
741  tableLids ()));
742  }
743  else { // tie_break is NOT null
744 
745  // For each directory Map LID received, collect all the
746  // corresponding (PID,LID) pairs. If the input Map is not
747  // one-to-one, corresponding directory Map LIDs will have
748  // more than one pair. In that case, we will use the
749  // TieBreak object to pick exactly one pair.
750  typedef std::map<LO, std::vector<std::pair<int, LO> > > pair_table_type;
751  pair_table_type ownedPidLidPairs;
752 
753  // For each directory Map LID received, collect the zero or
754  // more input Map (PID,LID) pairs into ownedPidLidPairs.
755  size_type importIndex = 0;
756  for (size_type i = 0; i < static_cast<size_type> (numReceives); ++i) {
757  const GO curGID = importElements[importIndex++];
758  const LO dirMapLid = directoryMap_->getLocalElement (curGID);
759  TEUCHOS_TEST_FOR_EXCEPTION(
760  dirMapLid == LINVALID, std::logic_error,
761  Teuchos::typeName(*this) << " constructor: Incoming global index "
762  << curGID << " does not have a corresponding local index in the "
763  "Directory Map. Please report this bug to the Tpetra developers.");
764  tableKeys[i] = dirMapLid;
765  const int PID = importElements[importIndex++];
766  const int LID = importElements[importIndex++];
767 
768  // These may change below. We fill them in just to ensure
769  // that they won't have invalid values.
770  tablePids[i] = PID;
771  tableLids[i] = LID;
772 
773  // For every directory Map LID, we have to remember all
774  // (PID, LID) pairs. The TieBreak object will arbitrate
775  // between them in the loop below.
776  ownedPidLidPairs[dirMapLid].push_back (std::make_pair (PID, LID));
777  }
778 
779  // Use TieBreak to arbitrate between (PID,LID) pairs
780  // corresponding to each directory Map LID.
781  //
782  // FIXME (mfh 23 Mar 2014) How do I know that i is the same
783  // as the directory Map LID?
784  // KDD 21 Mar 2018: It isn't, especially if the user's IDs are not
785  // contiguous, but the directory map is. Need to set tablePids[i]
786  // and tableLids[i], so need to loop over numReceives (as that is
787  // how those arrays are allocated). FIXED
788 
789  for (size_type i = 0; i < static_cast<size_type> (numReceives); ++i) {
790  const LO dirMapLid = tableKeys[i];
791  const std::vector<std::pair<int, LO> >& pidLidList =
792  ownedPidLidPairs[dirMapLid];
793  const size_t listLen = pidLidList.size();
794  if (listLen == 0) continue; // KDD This will never happen
795  const GO dirMapGid = directoryMap_->getGlobalElement (dirMapLid);
796  if (listLen > 1) {
797  locallyOneToOne_ = false;
798  }
799  // If there is some (PID,LID) pair for the current input
800  // Map LID, then it makes sense to invoke the TieBreak
801  // object to arbitrate between the options. Even if
802  // there is only one (PID,LID) pair, we still want to
803  // give the TieBreak object a chance to do whatever it
804  // likes to do, in terms of side effects (e.g., track
805  // (PID,LID) pairs).
806  const size_type index =
807  static_cast<size_type> (tie_break->selectedIndex (dirMapGid,
808  pidLidList));
809  tablePids[i] = pidLidList[index].first;
810  tableLids[i] = pidLidList[index].second;
811  }
812 
813  // Set up the hash tables.
814  typedef Kokkos::Device<typename NT::execution_space,
815  typename NT::memory_space> DT;
816  lidToPidTable_ =
817  rcp (new Details::FixedHashTable<LO, int, DT> (tableKeys (),
818  tablePids ()));
819  lidToLidTable_ =
820  rcp (new Details::FixedHashTable<LO, LO, DT> (tableKeys (),
821  tableLids ()));
822  }
823  }
824  else {
825  if (tie_break.is_null ()) {
826  // Use array-based implementation of Directory storage.
827  // Allocate these arrays and fill them with invalid values,
828  // in case the input Map's GID list is sparse (i.e., does
829  // not populate all GIDs from minAllGID to maxAllGID).
830  PIDs_ = arcp<int> (dir_numMyEntries);
831  std::fill (PIDs_.begin (), PIDs_.end (), -1);
832  LIDs_ = arcp<LO> (dir_numMyEntries);
833  std::fill (LIDs_.begin (), LIDs_.end (), LINVALID);
834  // Fill in the arrays with PIDs resp. LIDs.
835  size_type importIndex = 0;
836  for (size_type i = 0; i < static_cast<size_type> (numReceives); ++i) {
837  const GO curGID = importElements[importIndex++];
838  const LO curLID = directoryMap_->getLocalElement (curGID);
839  TEUCHOS_TEST_FOR_EXCEPTION(curLID == LINVALID, std::logic_error,
840  Teuchos::typeName(*this) << " constructor: Incoming global index "
841  << curGID << " does not have a corresponding local index in the "
842  "Directory Map. Please report this bug to the Tpetra developers.");
843 
844  // If PIDs_[curLID] is not -1, then curGID is a duplicate
845  // on the calling process, so the Directory is not locally
846  // one-to-one.
847  if (PIDs_[curLID] != -1) {
848  locallyOneToOne_ = false;
849  }
850  PIDs_[curLID] = importElements[importIndex++];
851  LIDs_[curLID] = importElements[importIndex++];
852  }
853  }
854  else {
855  PIDs_ = arcp<int> (dir_numMyEntries);
856  LIDs_ = arcp<LO> (dir_numMyEntries);
857  std::fill (PIDs_.begin (), PIDs_.end (), -1);
858 
859  // All received (PID, LID) pairs go into ownedPidLidPairs.
860  // This is a map from the directory Map's LID to the (PID,
861  // LID) pair (where the latter LID comes from the input Map,
862  // not the directory Map). If the input Map is not
863  // one-to-one, corresponding LIDs will have
864  // ownedPidLidPairs[curLID].size() > 1. In that case, we
865  // will use the TieBreak object to pick exactly one pair.
866  Array<std::vector<std::pair<int, LO> > > ownedPidLidPairs (dir_numMyEntries);
867  size_type importIndex = 0;
868  for (size_type i = 0; i < static_cast<size_type> (numReceives); ++i) {
869  const GO GID = importElements[importIndex++];
870  const int PID = importElements[importIndex++];
871  const LO LID = importElements[importIndex++];
872 
873  const LO dirMapLid = directoryMap_->getLocalElement (GID);
874  TEUCHOS_TEST_FOR_EXCEPTION(
875  dirMapLid == LINVALID, std::logic_error,
876  Teuchos::typeName(*this) << " constructor: Incoming global index "
877  << GID << " does not have a corresponding local index in the "
878  "Directory Map. Please report this bug to the Tpetra developers.");
879  ownedPidLidPairs[dirMapLid].push_back (std::make_pair (PID, LID));
880  }
881 
882  // Use TieBreak to arbitrate between (PID,LID) pairs
883  // corresponding to each directory Map LID.
884  //
885  // FIXME (mfh 23 Mar 2014) How do I know that i is the same
886  // as the directory Map LID?
887  // KDD 21 Mar 2018: It isn't, especially if the user's IDs are not
888  // contiguous. Loop over all ownedPidLidPairs; skip those that have
889  // empty lists. FIXED
890 
891  for (size_t i = 0; i < dir_numMyEntries; ++i) {
892  const std::vector<std::pair<int, LO> >& pidLidList =
893  ownedPidLidPairs[i];
894  const size_t listLen = pidLidList.size();
895  if (listLen == 0) continue; // KDD will happen for GIDs not in
896  // KDD the user's source map
897  const LO dirMapLid = static_cast<LO> (i);
898  const GO dirMapGid = directoryMap_->getGlobalElement (dirMapLid);
899  if (listLen > 1) {
900  locallyOneToOne_ = false;
901  }
902  // If there is some (PID,LID) pair for the current input
903  // Map LID, then it makes sense to invoke the TieBreak
904  // object to arbitrate between the options. Even if
905  // there is only one (PID,LID) pair, we still want to
906  // give the TieBreak object a chance to do whatever it
907  // likes to do, in terms of side effects (e.g., track
908  // (PID,LID) pairs).
909  const size_type index =
910  static_cast<size_type> (tie_break->selectedIndex (dirMapGid,
911  pidLidList));
912  PIDs_[i] = pidLidList[index].first;
913  LIDs_[i] = pidLidList[index].second;
914  }
915  }
916  }
917  }
918 
919  template<class LO, class GO, class NT>
920  std::string
922  {
923  std::ostringstream os;
924  os << "DistributedNoncontiguousDirectory"
925  << "<" << Teuchos::TypeNameTraits<LO>::name ()
926  << ", " << Teuchos::TypeNameTraits<GO>::name ()
927  << ", " << Teuchos::TypeNameTraits<NT>::name () << ">";
928  return os.str ();
929  }
930 
931  template<class LO, class GO, class NT>
934  getEntriesImpl (const map_type& map,
935  const Teuchos::ArrayView<const GO> &globalIDs,
936  const Teuchos::ArrayView<int> &nodeIDs,
937  const Teuchos::ArrayView<LO> &localIDs,
938  const bool computeLIDs) const
939  {
940  using Teuchos::Array;
941  using Teuchos::ArrayRCP;
942  using Teuchos::ArrayView;
943  using Teuchos::as;
944  using Teuchos::RCP;
945  using Teuchos::toString;
946  using ::Tpetra::Details::Behavior;
947  using std::cerr;
948  using std::endl;
949  using size_type = typename Array<GO>::size_type;
950  const char funcPrefix[] = "Tpetra::"
951  "DistributedNoncontiguousDirectory::getEntriesImpl: ";
952  const char errSuffix[] =
953  " Please report this bug to the Tpetra developers.";
954 
955  RCP<const Teuchos::Comm<int> > comm = map.getComm ();
956  const bool verbose = Behavior::verbose ("Directory") ||
957  Behavior::verbose ("Tpetra::Directory");
958  std::unique_ptr<std::string> procPrefix;
959  if (verbose) {
960  std::ostringstream os;
961  os << "Proc ";
962  if (map.getComm ().is_null ()) {
963  os << "?";
964  }
965  else {
966  os << map.getComm ()->getRank ();
967  }
968  os << ": ";
969  procPrefix = std::unique_ptr<std::string> (new std::string (os.str ()));
970  os << funcPrefix << "{GIDs: " << toString (globalIDs)
971  << ", PIDs: " << toString (nodeIDs)
972  << ", LIDs: " << toString (localIDs)
973  << ", computeLIDs: " << (computeLIDs ? "true" : "false")
974  << "}" << endl;
975  cerr << os.str ();
976  }
977 
978  const size_t numEntries = globalIDs.size ();
979  const LO LINVALID = Teuchos::OrdinalTraits<LO>::invalid();
981 
982  //
983  // Set up directory structure.
984  //
985 
986  // If we're computing LIDs, we also have to include them in each
987  // packet, along with the GID and process ID.
988  const int packetSize = computeLIDs ? 3 : 2;
989 
990  // For data distribution, we use: Surprise! A Distributor!
991  Distributor distor (comm);
992 
993  // Get directory locations for the requested list of entries.
994  Array<int> dirImages (numEntries);
995  if (verbose) {
996  std::ostringstream os;
997  os << *procPrefix << "Call directoryMap_->getRemoteIndexList"
998  << endl;
999  cerr << os.str ();
1000  }
1001  res = directoryMap_->getRemoteIndexList (globalIDs, dirImages ());
1002  if (verbose) {
1003  std::ostringstream os;
1004  os << *procPrefix << "directoryMap_->getRemoteIndexList "
1005  "PIDs result: " << toString (dirImages) << endl;
1006  cerr << os.str ();
1007  }
1008 
1009  // Check for unfound globalIDs and set corresponding nodeIDs to -1
1010  size_t numMissing = 0;
1011  if (res == IDNotPresent) {
1012  for (size_t i=0; i < numEntries; ++i) {
1013  if (dirImages[i] == -1) {
1014  nodeIDs[i] = -1;
1015  if (computeLIDs) {
1016  localIDs[i] = LINVALID;
1017  }
1018  numMissing++;
1019  }
1020  }
1021  }
1022 
1023  Array<GO> sendGIDs;
1024  Array<int> sendImages;
1025  if (verbose) {
1026  std::ostringstream os;
1027  os << *procPrefix << "Call Distributor::createFromRecvs"
1028  << endl;
1029  cerr << os.str ();
1030  }
1031  distor.createFromRecvs (globalIDs, dirImages (), sendGIDs, sendImages);
1032  if (verbose) {
1033  std::ostringstream os;
1034  os << *procPrefix << "Distributor::createFromRecvs result: "
1035  << "{sendGIDs: " << toString (sendGIDs)
1036  << ", sendPIDs: " << toString (sendImages)
1037  << "}" << endl;
1038  cerr << os.str ();
1039  }
1040  const size_type numSends = sendGIDs.size ();
1041 
1042  //
1043  // mfh 13 Nov 2012:
1044  //
1045  // The code below temporarily stores LO, GO, and int values in
1046  // an array of global_size_t. If one of the signed types (LO
1047  // and GO should both be signed) happened to be -1 (or some
1048  // negative number, but -1 is the one that came up today), then
1049  // conversion to global_size_t will result in a huge
1050  // global_size_t value, and thus conversion back may overflow.
1051  // (Teuchos::as doesn't know that we meant it to be an LO or GO
1052  // all along.)
1053  //
1054  // The overflow normally would not be a problem, since it would
1055  // just go back to -1 again. However, Teuchos::as does range
1056  // checking on conversions in a debug build, so it throws an
1057  // exception (std::range_error) in this case. Range checking is
1058  // generally useful in debug mode, so we don't want to disable
1059  // this behavior globally.
1060  //
1061  // We solve this problem by forgoing use of Teuchos::as for the
1062  // conversions below from LO, GO, or int to global_size_t, and
1063  // the later conversions back from global_size_t to LO, GO, or
1064  // int.
1065  //
1066  // I've recorded this discussion as Bug 5760.
1067  //
1068 
1069  // global_size_t >= GO
1070  // global_size_t >= size_t >= int
1071  // global_size_t >= size_t >= LO
1072  // Therefore, we can safely store all of these in a global_size_t
1073  Array<global_size_t> exports (packetSize * numSends);
1074  {
1075  // Packet format:
1076  // - If computing LIDs: (GID, PID, LID)
1077  // - Otherwise: (GID, PID)
1078  //
1079  // "PID" means "process ID" (a.k.a. "node ID," a.k.a. "rank").
1080 
1081  // Current position to which to write in exports array. If
1082  // sending pairs, we pack the (GID, PID) pair for gid =
1083  // sendGIDs[k] in exports[2*k], exports[2*k+1]. If sending
1084  // triples, we pack the (GID, PID, LID) pair for gid =
1085  // sendGIDs[k] in exports[3*k, 3*k+1, 3*k+2].
1086  size_type exportsIndex = 0;
1087 
1088  if (useHashTables_) {
1089  if (verbose) {
1090  std::ostringstream os;
1091  os << *procPrefix << "Pack exports (useHashTables_ true)"
1092  << endl;
1093  cerr << os.str ();
1094  }
1095  for (size_type gidIndex = 0; gidIndex < numSends; ++gidIndex) {
1096  const GO curGID = sendGIDs[gidIndex];
1097  // Don't use as() here (see above note).
1098  exports[exportsIndex++] = static_cast<global_size_t> (curGID);
1099  const LO curLID = directoryMap_->getLocalElement (curGID);
1100  TEUCHOS_TEST_FOR_EXCEPTION
1101  (curLID == LINVALID, std::logic_error, funcPrefix <<
1102  "Directory Map's global index " << curGID << " lacks "
1103  "a corresponding local index." << errSuffix);
1104  // Don't use as() here (see above note).
1105  exports[exportsIndex++] =
1106  static_cast<global_size_t> (lidToPidTable_->get (curLID));
1107  if (computeLIDs) {
1108  // Don't use as() here (see above note).
1109  exports[exportsIndex++] =
1110  static_cast<global_size_t> (lidToLidTable_->get (curLID));
1111  }
1112  }
1113  }
1114  else {
1115  if (verbose) {
1116  std::ostringstream os;
1117  os << *procPrefix << "Pack exports (useHashTables_ false)"
1118  << endl;
1119  cerr << os.str ();
1120  }
1121  for (size_type gidIndex = 0; gidIndex < numSends; ++gidIndex) {
1122  const GO curGID = sendGIDs[gidIndex];
1123  // Don't use as() here (see above note).
1124  exports[exportsIndex++] = static_cast<global_size_t> (curGID);
1125  const LO curLID = directoryMap_->getLocalElement (curGID);
1126  TEUCHOS_TEST_FOR_EXCEPTION
1127  (curLID == LINVALID, std::logic_error, funcPrefix <<
1128  "Directory Map's global index " << curGID << " lacks "
1129  "a corresponding local index." << errSuffix);
1130  // Don't use as() here (see above note).
1131  exports[exportsIndex++] =
1132  static_cast<global_size_t> (PIDs_[curLID]);
1133  if (computeLIDs) {
1134  // Don't use as() here (see above note).
1135  exports[exportsIndex++] =
1136  static_cast<global_size_t> (LIDs_[curLID]);
1137  }
1138  }
1139  }
1140 
1141  TEUCHOS_TEST_FOR_EXCEPTION
1142  (exportsIndex > exports.size (), std::logic_error,
1143  funcPrefix << "On Process " << comm->getRank () << ", "
1144  "exportsIndex = " << exportsIndex << " > exports.size() = "
1145  << exports.size () << "." << errSuffix);
1146  }
1147 
1148  TEUCHOS_TEST_FOR_EXCEPTION
1149  (numEntries < numMissing, std::logic_error, funcPrefix <<
1150  "On Process " << comm->getRank () << ", numEntries = " <<
1151  numEntries << " < numMissing = " << numMissing << "." <<
1152  errSuffix);
1153 
1154  //
1155  // mfh 13 Nov 2012: See note above on conversions between
1156  // global_size_t and LO, GO, or int.
1157  //
1158  const size_t numRecv = numEntries - numMissing;
1159 
1160  {
1161  const size_t importLen = packetSize * distor.getTotalReceiveLength ();
1162  const size_t requiredImportLen = numRecv * packetSize;
1163  const int myRank = comm->getRank ();
1164  TEUCHOS_TEST_FOR_EXCEPTION(
1165  importLen < requiredImportLen, std::logic_error,
1166  "Tpetra::Details::DistributedNoncontiguousDirectory::getEntriesImpl: "
1167  "On Process " << myRank << ": The 'imports' array must have length "
1168  "at least " << requiredImportLen << ", but its actual length is " <<
1169  importLen << ". numRecv: " << numRecv << ", packetSize: " <<
1170  packetSize << ", numEntries (# GIDs): " << numEntries <<
1171  ", numMissing: " << numMissing << ": distor.getTotalReceiveLength(): "
1172  << distor.getTotalReceiveLength () << ". " << std::endl <<
1173  "Distributor description: " << distor.description () << ". "
1174  << std::endl <<
1175  "Please report this bug to the Tpetra developers.");
1176  }
1177 
1178  Array<global_size_t> imports (packetSize * distor.getTotalReceiveLength ());
1179  // FIXME (mfh 20 Mar 2014) One could overlap the sort2() below
1180  // with communication, by splitting this call into doPosts and
1181  // doWaits. The code is still correct in this form, however.
1182  if (verbose) {
1183  std::ostringstream os;
1184  os << *procPrefix << "Call doPostsAndWaits: {packetSize: "
1185  << packetSize << ", exports: " << toString (exports) << "}"
1186  << endl;
1187  cerr << os.str ();
1188  }
1189  distor.doPostsAndWaits (exports ().getConst (), packetSize, imports ());
1190  if (verbose) {
1191  std::ostringstream os;
1192  os << *procPrefix << "doPostsAndWaits result: "
1193  << toString (imports) << endl;
1194  cerr << os.str ();
1195  }
1196 
1197  Array<GO> sortedIDs (globalIDs); // deep copy (for later sorting)
1198  Array<GO> offset (numEntries); // permutation array (sort2 output)
1199  for (GO ii = 0; ii < static_cast<GO> (numEntries); ++ii) {
1200  offset[ii] = ii;
1201  }
1202  sort2 (sortedIDs.begin(), sortedIDs.begin() + numEntries, offset.begin());
1203  if (verbose) {
1204  std::ostringstream os;
1205  os << *procPrefix << "sortedIDs: " << toString (sortedIDs)
1206  << ", offset: " << toString (offset) << endl;
1207  cerr << os.str ();
1208  }
1209 
1210  size_t importsIndex = 0;
1211 
1212  // we know these conversions are in range, because we loaded this data
1213  //
1214  // Don't use as() for conversions here; we know they are in range.
1215  for (size_t i = 0; i < numRecv; ++i) {
1216  const GO curGID = static_cast<GO> (imports[importsIndex++]);
1217  auto p1 = std::equal_range (sortedIDs.begin(),
1218  sortedIDs.end(), curGID);
1219  if (p1.first != p1.second) {
1220  const size_t j = p1.first - sortedIDs.begin();
1221  nodeIDs[offset[j]] =
1222  static_cast<int> (imports[importsIndex++]);
1223  if (computeLIDs) {
1224  localIDs[offset[j]] =
1225  static_cast<LO> (imports[importsIndex++]);
1226  }
1227  if (nodeIDs[offset[j]] == -1) {
1228  res = IDNotPresent;
1229  }
1230  }
1231  }
1232 
1233  TEUCHOS_TEST_FOR_EXCEPTION
1234  (size_t (importsIndex) > size_t (imports.size ()),
1235  std::logic_error, funcPrefix << "On Process " <<
1236  comm->getRank () << ": importsIndex = " << importsIndex
1237  << " > imports.size() = " << imports.size () << ". "
1238  "numRecv: " << numRecv
1239  << ", packetSize: " << packetSize << ", "
1240  "numEntries (# GIDs): " << numEntries
1241  << ", numMissing: " << numMissing
1242  << ": distor.getTotalReceiveLength(): "
1243  << distor.getTotalReceiveLength () << "." << errSuffix);
1244  if (verbose) {
1245  std::ostringstream os;
1246  os << *procPrefix << funcPrefix << "Done!" << endl;
1247  cerr << os.str ();
1248  }
1249  return res;
1250  }
1251 
1252 
1253  template<class LO, class GO, class NT>
1254  bool
1256  isOneToOne (const Teuchos::Comm<int>& comm) const
1257  {
1258  if (oneToOneResult_ == ONE_TO_ONE_NOT_CALLED_YET) {
1259  const int lcl121 = isLocallyOneToOne () ? 1 : 0;
1260  int gbl121 = 0;
1261  Teuchos::reduceAll<int, int> (comm, Teuchos::REDUCE_MIN, lcl121,
1262  Teuchos::outArg (gbl121));
1263  oneToOneResult_ = (gbl121 == 1) ? ONE_TO_ONE_TRUE : ONE_TO_ONE_FALSE;
1264  }
1265  return (oneToOneResult_ == ONE_TO_ONE_TRUE);
1266  }
1267  } // namespace Details
1268 } // namespace Tpetra
1269 
1270 //
1271 // Explicit instantiation macro
1272 //
1273 // Must be expanded from within the Tpetra namespace!
1274 //
1275 #define TPETRA_DIRECTORYIMPL_INSTANT(LO,GO,NODE) \
1276  namespace Details { \
1277  template class Directory< LO , GO , NODE >; \
1278  template class ReplicatedDirectory< LO , GO , NODE >; \
1279  template class ContiguousUniformDirectory< LO, GO, NODE >; \
1280  template class DistributedContiguousDirectory< LO , GO , NODE >; \
1281  template class DistributedNoncontiguousDirectory< LO , GO , NODE >; \
1282  }
1283 
1284 #endif // __Tpetra_DirectoryImpl_def_hpp
Tpetra::IDNotPresent
@ IDNotPresent
Definition: Tpetra_ConfigDefs.hpp:126
Tpetra::Details::DistributedContiguousDirectory::description
std::string description() const
A one-line human-readable description of this object.
Definition: Tpetra_DirectoryImpl_def.hpp:363
Tpetra::Distributor::description
std::string description() const
Return a one-line description of this object.
Definition: Tpetra_Distributor.cpp:592
Tpetra::Details::DistributedNoncontiguousDirectory::description
std::string description() const
A one-line human-readable description of this object.
Definition: Tpetra_DirectoryImpl_def.hpp:921
Tpetra::sort2
void sort2(const IT1 &first1, const IT1 &last1, const IT2 &first2)
Sort the first array, and apply the resulting permutation to the second array.
Definition: Tpetra_Util.hpp:532
Tpetra::Map::isNodeGlobalElement
bool isNodeGlobalElement(GlobalOrdinal globalIndex) const
Whether the given global index is owned by this Map on the calling process.
Definition: Tpetra_Map_def.hpp:1278
Tpetra::Details::DistributedNoncontiguousDirectory::getEntriesImpl
LookupStatus getEntriesImpl(const map_type &map, const Teuchos::ArrayView< const GlobalOrdinal > &globalIDs, const Teuchos::ArrayView< int > &nodeIDs, const Teuchos::ArrayView< LocalOrdinal > &localIDs, const bool computeLIDs) const override
Find process IDs and (optionally) local IDs for the given global IDs.
Definition: Tpetra_DirectoryImpl_def.hpp:934
Tpetra::Details::ReplicatedDirectory::description
std::string description() const
A one-line human-readable description of this object.
Definition: Tpetra_DirectoryImpl_def.hpp:123
Tpetra::Map
A parallel distribution of indices over processes.
Definition: Tpetra_Map_decl.hpp:246
Tpetra::Details::DistributedContiguousDirectory
Implementation of Directory for a distributed contiguous Map.
Definition: Tpetra_DirectoryImpl_decl.hpp:271
Tpetra::Details::ContiguousUniformDirectory
Implementation of Directory for a contiguous, uniformly distributed Map.
Definition: Tpetra_DirectoryImpl_decl.hpp:222
Tpetra::Distributor::doPostsAndWaits
void doPostsAndWaits(const Teuchos::ArrayView< const Packet > &exports, size_t numPackets, const Teuchos::ArrayView< Packet > &imports)
Execute the (forward) communication plan.
Definition: Tpetra_Distributor.hpp:1037
Details
Implementation details of Tpetra.
Tpetra::Map::getMinAllGlobalIndex
GlobalOrdinal getMinAllGlobalIndex() const
The minimum global index over all processes in the communicator.
Definition: Tpetra_Map_decl.hpp:681
Tpetra::Details::ReplicatedDirectory::ReplicatedDirectory
ReplicatedDirectory()=default
Constructor (that takes no arguments).
Tpetra::AllIDsPresent
@ AllIDsPresent
Definition: Tpetra_ConfigDefs.hpp:125
Tpetra::Details::DistributedNoncontiguousDirectory
Implementation of Directory for a distributed noncontiguous Map.
Definition: Tpetra_DirectoryImpl_decl.hpp:351
Tpetra::Details::ReplicatedDirectory::isOneToOne
bool isOneToOne(const Teuchos::Comm< int > &comm) const override
Whether the Directory's input Map is (globally) one to one.
Definition: Tpetra_DirectoryImpl_def.hpp:112
Tpetra::Details::DistributedNoncontiguousDirectory::isOneToOne
bool isOneToOne(const Teuchos::Comm< int > &comm) const override
Whether the Directory's input Map is (globally) one to one.
Definition: Tpetra_DirectoryImpl_def.hpp:1256
Tpetra_TieBreak.hpp
Interface for breaking ties in ownership.
Tpetra::initialize
void initialize(int *argc, char ***argv)
Initialize Tpetra.
Definition: Tpetra_Core.cpp:230
Tpetra::Map::getComm
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const
Accessors for the Teuchos::Comm and Kokkos Node objects.
Definition: Tpetra_Map_def.hpp:2102
Tpetra::Details::DistributedContiguousDirectory::getEntriesImpl
LookupStatus getEntriesImpl(const map_type &map, const Teuchos::ArrayView< const GlobalOrdinal > &globalIDs, const Teuchos::ArrayView< int > &nodeIDs, const Teuchos::ArrayView< LocalOrdinal > &localIDs, const bool computeLIDs) const override
Find process IDs and (optionally) local IDs for the given global IDs.
Definition: Tpetra_DirectoryImpl_def.hpp:419
Tpetra::Details::ContiguousUniformDirectory::description
std::string description() const
A one-line human-readable description of this object.
Definition: Tpetra_DirectoryImpl_def.hpp:147
Tpetra::Distributor
Sets up and executes a communication plan for a Tpetra DistObject.
Definition: Tpetra_Distributor.hpp:192
Tpetra::Details::Behavior::verbose
static bool verbose()
Whether Tpetra is in verbose mode.
Definition: Tpetra_Details_Behavior.cpp:205
Tpetra::Map::getGlobalNumElements
global_size_t getGlobalNumElements() const
The number of elements in this Map.
Definition: Tpetra_Map_decl.hpp:609
Tpetra::LookupStatus
LookupStatus
Return status of Map remote index lookup (getRemoteIndexList()).
Definition: Tpetra_ConfigDefs.hpp:124
Tpetra::Map::getLocalElement
LocalOrdinal getLocalElement(GlobalOrdinal globalIndex) const
The local index corresponding to the given global index.
Definition: Tpetra_Map_def.hpp:1223
Tpetra::Details::ContiguousUniformDirectory::getEntriesImpl
LookupStatus getEntriesImpl(const map_type &map, const Teuchos::ArrayView< const GlobalOrdinal > &globalIDs, const Teuchos::ArrayView< int > &nodeIDs, const Teuchos::ArrayView< LocalOrdinal > &localIDs, const bool computeLIDs) const override
Find process IDs and (optionally) local IDs for the given global IDs.
Definition: Tpetra_DirectoryImpl_def.hpp:161
Tpetra::Details::Directory::getEntries
LookupStatus getEntries(const map_type &map, const Teuchos::ArrayView< const GlobalOrdinal > &globalIDs, const Teuchos::ArrayView< int > &nodeIDs, const Teuchos::ArrayView< LocalOrdinal > &localIDs, const bool computeLIDs) const
Definition: Tpetra_DirectoryImpl_def.hpp:67
Tpetra::global_size_t
size_t global_size_t
Global size_t object.
Definition: Tpetra_ConfigDefs.hpp:109
Tpetra
Namespace Tpetra contains the class and methods constituting the Tpetra library.
Tpetra::Distributor::createFromRecvs
void createFromRecvs(const Teuchos::ArrayView< const Ordinal > &remoteIDs, const Teuchos::ArrayView< const int > &remoteProcIDs, Teuchos::Array< Ordinal > &exportIDs, Teuchos::Array< int > &exportProcIDs)
Set up Distributor using list of process ranks from which to receive.
Tpetra::Distributor::getTotalReceiveLength
size_t getTotalReceiveLength() const
Total number of values this process will receive from other processes.
Definition: Tpetra_Distributor.cpp:404
Tpetra::Details::ReplicatedDirectory::getEntriesImpl
LookupStatus getEntriesImpl(const map_type &map, const Teuchos::ArrayView< const GlobalOrdinal > &globalIDs, const Teuchos::ArrayView< int > &nodeIDs, const Teuchos::ArrayView< LocalOrdinal > &localIDs, const bool computeLIDs) const override
Find process IDs and (optionally) local IDs for the given global IDs.
Definition: Tpetra_DirectoryImpl_def.hpp:376