pacemaker  1.1.19-c3c624ea3d
Scalable High-Availability cluster resource manager
unpack.c
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
3  *
4  * This library is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public
6  * License as published by the Free Software Foundation; either
7  * version 2.1 of the License, or (at your option) any later version.
8  *
9  * This library is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12  * Lesser General Public License for more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public
15  * License along with this library; if not, write to the Free Software
16  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17  */
18 #include <crm_internal.h>
19 
20 #include <glib.h>
21 
22 #include <crm/crm.h>
23 #include <crm/services.h>
24 #include <crm/msg_xml.h>
25 #include <crm/common/xml.h>
26 
27 #include <crm/common/util.h>
28 #include <crm/pengine/rules.h>
29 #include <crm/pengine/internal.h>
30 #include <unpack.h>
31 
32 CRM_TRACE_INIT_DATA(pe_status);
33 
34 #define set_config_flag(data_set, option, flag) do { \
35  const char *tmp = pe_pref(data_set->config_hash, option); \
36  if(tmp) { \
37  if(crm_is_true(tmp)) { \
38  set_bit(data_set->flags, flag); \
39  } else { \
40  clear_bit(data_set->flags, flag); \
41  } \
42  } \
43  } while(0)
44 
45 gboolean unpack_rsc_op(resource_t * rsc, node_t * node, xmlNode * xml_op, xmlNode ** last_failure,
46  enum action_fail_response *failed, pe_working_set_t * data_set);
47 static gboolean determine_remote_online_status(pe_working_set_t * data_set, node_t * this_node);
48 
49 // Bitmask for warnings we only want to print once
51 
52 static gboolean
53 is_dangling_container_remote_node(node_t *node)
54 {
55  /* we are looking for a remote-node that was supposed to be mapped to a
56  * container resource, but all traces of that container have disappeared
57  * from both the config and the status section. */
58  if (is_remote_node(node) &&
59  node->details->remote_rsc &&
60  node->details->remote_rsc->container == NULL &&
62  return TRUE;
63  }
64 
65  return FALSE;
66 }
67 
68 
76 void
77 pe_fence_node(pe_working_set_t * data_set, node_t * node, const char *reason)
78 {
79  CRM_CHECK(node, return);
80 
81  /* A guest node is fenced by marking its container as failed */
82  if (is_container_remote_node(node)) {
83  resource_t *rsc = node->details->remote_rsc->container;
84 
85  if (is_set(rsc->flags, pe_rsc_failed) == FALSE) {
86  if (!is_set(rsc->flags, pe_rsc_managed)) {
87  crm_notice("Not fencing guest node %s "
88  "(otherwise would because %s): "
89  "its guest resource %s is unmanaged",
90  node->details->uname, reason, rsc->id);
91  } else {
92  crm_warn("Guest node %s will be fenced "
93  "(by recovering its guest resource %s): %s",
94  node->details->uname, rsc->id, reason);
95 
96  /* We don't mark the node as unclean because that would prevent the
97  * node from running resources. We want to allow it to run resources
98  * in this transition if the recovery succeeds.
99  */
100  node->details->remote_requires_reset = TRUE;
101  set_bit(rsc->flags, pe_rsc_failed);
102  }
103  }
104 
105  } else if (is_dangling_container_remote_node(node)) {
106  crm_info("Cleaning up dangling connection for guest node %s: "
107  "fencing was already done because %s, "
108  "and guest resource no longer exists",
109  node->details->uname, reason);
111 
112  } else if (is_baremetal_remote_node(node)) {
113  resource_t *rsc = node->details->remote_rsc;
114 
115  if (rsc && (!is_set(rsc->flags, pe_rsc_managed))) {
116  crm_notice("Not fencing remote node %s "
117  "(otherwise would because %s): connection is unmanaged",
118  node->details->uname, reason);
119  } else if(node->details->remote_requires_reset == FALSE) {
120  node->details->remote_requires_reset = TRUE;
121  crm_warn("Remote node %s %s: %s",
122  node->details->uname,
123  pe_can_fence(data_set, node)? "will be fenced" : "is unclean",
124  reason);
125  }
126  node->details->unclean = TRUE;
127  pe_fence_op(node, NULL, TRUE, reason, data_set);
128 
129  } else if (node->details->unclean) {
130  crm_trace("Cluster node %s %s because %s",
131  node->details->uname,
132  pe_can_fence(data_set, node)? "would also be fenced" : "also is unclean",
133  reason);
134 
135  } else {
136  crm_warn("Cluster node %s %s: %s",
137  node->details->uname,
138  pe_can_fence(data_set, node)? "will be fenced" : "is unclean",
139  reason);
140  node->details->unclean = TRUE;
141  pe_fence_op(node, NULL, TRUE, reason, data_set);
142  }
143 }
144 
145 // @TODO xpaths can't handle templates, rules, or id-refs
146 
147 // nvpair with provides or requires set to unfencing
148 #define XPATH_UNFENCING_NVPAIR XML_CIB_TAG_NVPAIR \
149  "[(@" XML_NVPAIR_ATTR_NAME "='" XML_RSC_ATTR_PROVIDES "'" \
150  "or @" XML_NVPAIR_ATTR_NAME "='" XML_RSC_ATTR_REQUIRES "') " \
151  "and @" XML_NVPAIR_ATTR_VALUE "='unfencing']"
152 
153 // unfencing in rsc_defaults or any resource
154 #define XPATH_ENABLE_UNFENCING \
155  "/" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION "/" XML_CIB_TAG_RESOURCES \
156  "//" XML_TAG_META_SETS "/" XPATH_UNFENCING_NVPAIR \
157  "|/" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION "/" XML_CIB_TAG_RSCCONFIG \
158  "/" XML_TAG_META_SETS "/" XPATH_UNFENCING_NVPAIR
159 
160 static
161 void set_if_xpath(unsigned long long flag, const char *xpath,
162  pe_working_set_t *data_set)
163 {
164  xmlXPathObjectPtr result = NULL;
165 
166  if (is_not_set(data_set->flags, flag)) {
167  result = xpath_search(data_set->input, xpath);
168  if (result && (numXpathResults(result) > 0)) {
169  set_bit(data_set->flags, flag);
170  }
171  freeXpathObject(result);
172  }
173 }
174 
175 gboolean
176 unpack_config(xmlNode * config, pe_working_set_t * data_set)
177 {
178  const char *value = NULL;
179  GHashTable *config_hash = crm_str_table_new();
180 
181  data_set->config_hash = config_hash;
182 
183  unpack_instance_attributes(data_set->input, config, XML_CIB_TAG_PROPSET, NULL, config_hash,
184  CIB_OPTIONS_FIRST, FALSE, data_set->now);
185 
186  verify_pe_options(data_set->config_hash);
187 
188  set_config_flag(data_set, "enable-startup-probes", pe_flag_startup_probes);
189  if(is_not_set(data_set->flags, pe_flag_startup_probes)) {
190  crm_info("Startup probes: disabled (dangerous)");
191  }
192 
193  value = pe_pref(data_set->config_hash, XML_ATTR_HAVE_WATCHDOG);
194  if (value && crm_is_true(value)) {
195  crm_notice("Watchdog will be used via SBD if fencing is required");
197  }
198 
199  /* Set certain flags via xpath here, so they can be used before the relevant
200  * configuration sections are unpacked.
201  */
202  set_if_xpath(pe_flag_enable_unfencing, XPATH_ENABLE_UNFENCING, data_set);
203 
204  value = pe_pref(data_set->config_hash, "stonith-timeout");
205  data_set->stonith_timeout = crm_get_msec(value);
206  crm_debug("STONITH timeout: %d", data_set->stonith_timeout);
207 
208  set_config_flag(data_set, "stonith-enabled", pe_flag_stonith_enabled);
209  crm_debug("STONITH of failed nodes is %s",
210  is_set(data_set->flags, pe_flag_stonith_enabled) ? "enabled" : "disabled");
211 
212  data_set->stonith_action = pe_pref(data_set->config_hash, "stonith-action");
213  if (!strcmp(data_set->stonith_action, "poweroff")) {
215  "Support for stonith-action of 'poweroff' is deprecated "
216  "and will be removed in a future release (use 'off' instead)");
217  data_set->stonith_action = "off";
218  }
219  crm_trace("STONITH will %s nodes", data_set->stonith_action);
220 
221  set_config_flag(data_set, "concurrent-fencing", pe_flag_concurrent_fencing);
222  crm_debug("Concurrent fencing is %s",
223  is_set(data_set->flags, pe_flag_concurrent_fencing) ? "enabled" : "disabled");
224 
225  set_config_flag(data_set, "stop-all-resources", pe_flag_stop_everything);
226  crm_debug("Stop all active resources: %s",
227  is_set(data_set->flags, pe_flag_stop_everything) ? "true" : "false");
228 
229  set_config_flag(data_set, "symmetric-cluster", pe_flag_symmetric_cluster);
230  if (is_set(data_set->flags, pe_flag_symmetric_cluster)) {
231  crm_debug("Cluster is symmetric" " - resources can run anywhere by default");
232  }
233 
234  value = pe_pref(data_set->config_hash, "default-resource-stickiness");
235  if (value) {
237  "Support for 'default-resource-stickiness' cluster property"
238  " is deprecated and will be removed in a future release"
239  " (use resource-stickiness in rsc_defaults instead)");
240  }
241  data_set->default_resource_stickiness = char2score(value);
242  crm_debug("Default stickiness: %d", data_set->default_resource_stickiness);
243 
244  value = pe_pref(data_set->config_hash, "no-quorum-policy");
245 
246  if (safe_str_eq(value, "ignore")) {
248 
249  } else if (safe_str_eq(value, "freeze")) {
251 
252  } else if (safe_str_eq(value, "suicide")) {
253  if (is_set(data_set->flags, pe_flag_stonith_enabled)) {
254  int do_panic = 0;
255 
257  &do_panic);
258  if (do_panic || is_set(data_set->flags, pe_flag_have_quorum)) {
260  } else {
261  crm_notice("Resetting no-quorum-policy to 'stop': cluster has never had quorum");
262  data_set->no_quorum_policy = no_quorum_stop;
263  }
264  } else {
265  crm_config_err("Resetting no-quorum-policy to 'stop': stonith is not configured");
266  data_set->no_quorum_policy = no_quorum_stop;
267  }
268 
269  } else {
270  data_set->no_quorum_policy = no_quorum_stop;
271  }
272 
273  switch (data_set->no_quorum_policy) {
274  case no_quorum_freeze:
275  crm_debug("On loss of CCM Quorum: Freeze resources");
276  break;
277  case no_quorum_stop:
278  crm_debug("On loss of CCM Quorum: Stop ALL resources");
279  break;
280  case no_quorum_suicide:
281  crm_notice("On loss of CCM Quorum: Fence all remaining nodes");
282  break;
283  case no_quorum_ignore:
284  crm_notice("On loss of CCM Quorum: Ignore");
285  break;
286  }
287 
288  set_config_flag(data_set, "stop-orphan-resources", pe_flag_stop_rsc_orphans);
289  crm_trace("Orphan resources are %s",
290  is_set(data_set->flags, pe_flag_stop_rsc_orphans) ? "stopped" : "ignored");
291 
292  set_config_flag(data_set, "stop-orphan-actions", pe_flag_stop_action_orphans);
293  crm_trace("Orphan resource actions are %s",
294  is_set(data_set->flags, pe_flag_stop_action_orphans) ? "stopped" : "ignored");
295 
296  set_config_flag(data_set, "remove-after-stop", pe_flag_remove_after_stop);
297  crm_trace("Stopped resources are removed from the status section: %s",
298  is_set(data_set->flags, pe_flag_remove_after_stop) ? "true" : "false");
299 
300  set_config_flag(data_set, "maintenance-mode", pe_flag_maintenance_mode);
301  crm_trace("Maintenance mode: %s",
302  is_set(data_set->flags, pe_flag_maintenance_mode) ? "true" : "false");
303 
304  if (is_set(data_set->flags, pe_flag_maintenance_mode)) {
306  } else if (pe_pref(data_set->config_hash, "is-managed-default")) {
307  set_config_flag(data_set, "is-managed-default", pe_flag_is_managed_default);
309  "Support for 'is-managed-default' cluster property"
310  " is deprecated and will be removed in a future release"
311  " (use is-managed in rsc_defaults instead)");
312  }
313  crm_trace("By default resources are %smanaged",
314  is_set(data_set->flags, pe_flag_is_managed_default) ? "" : "not ");
315 
316  set_config_flag(data_set, "start-failure-is-fatal", pe_flag_start_failure_fatal);
317  crm_trace("Start failures are %s",
318  is_set(data_set->flags,
319  pe_flag_start_failure_fatal) ? "always fatal" : "handled by failcount");
320 
321  if (is_set(data_set->flags, pe_flag_stonith_enabled)) {
322  set_config_flag(data_set, "startup-fencing", pe_flag_startup_fencing);
323  }
324  if (is_set(data_set->flags, pe_flag_startup_fencing)) {
325  crm_trace("Unseen nodes will be fenced");
326  } else {
327  pe_warn_once(pe_wo_blind, "Blind faith: not fencing unseen nodes");
328  }
329 
330  node_score_red = char2score(pe_pref(data_set->config_hash, "node-health-red"));
331  node_score_green = char2score(pe_pref(data_set->config_hash, "node-health-green"));
332  node_score_yellow = char2score(pe_pref(data_set->config_hash, "node-health-yellow"));
333 
334  crm_debug("Node scores: 'red' = %s, 'yellow' = %s, 'green' = %s",
335  pe_pref(data_set->config_hash, "node-health-red"),
336  pe_pref(data_set->config_hash, "node-health-yellow"),
337  pe_pref(data_set->config_hash, "node-health-green"));
338 
339  data_set->placement_strategy = pe_pref(data_set->config_hash, "placement-strategy");
340  crm_trace("Placement strategy: %s", data_set->placement_strategy);
341 
342  return TRUE;
343 }
344 
345 static void
346 destroy_digest_cache(gpointer ptr)
347 {
348  op_digest_cache_t *data = ptr;
349 
350  free_xml(data->params_all);
351  free_xml(data->params_secure);
352  free_xml(data->params_restart);
353 
354  free(data->digest_all_calc);
355  free(data->digest_restart_calc);
356  free(data->digest_secure_calc);
357 
358  free(data);
359 }
360 
361 node_t *
362 pe_create_node(const char *id, const char *uname, const char *type,
363  const char *score, pe_working_set_t * data_set)
364 {
365  node_t *new_node = NULL;
366 
367  if (pe_find_node(data_set->nodes, uname) != NULL) {
368  crm_config_warn("Detected multiple node entries with uname=%s"
369  " - this is rarely intended", uname);
370  }
371 
372  new_node = calloc(1, sizeof(node_t));
373  if (new_node == NULL) {
374  return NULL;
375  }
376 
377  new_node->weight = char2score(score);
378  new_node->fixed = FALSE;
379  new_node->details = calloc(1, sizeof(struct node_shared_s));
380 
381  if (new_node->details == NULL) {
382  free(new_node);
383  return NULL;
384  }
385 
386  crm_trace("Creating node for entry %s/%s", uname, id);
387  new_node->details->id = id;
388  new_node->details->uname = uname;
389  new_node->details->online = FALSE;
390  new_node->details->shutdown = FALSE;
391  new_node->details->rsc_discovery_enabled = TRUE;
392  new_node->details->running_rsc = NULL;
393  new_node->details->type = node_ping;
394 
395  if (safe_str_eq(type, "remote")) {
396  new_node->details->type = node_remote;
398  } else if (type == NULL || safe_str_eq(type, "member")
399  || safe_str_eq(type, NORMALNODE)) {
400  new_node->details->type = node_member;
401  }
402 
403  new_node->details->attrs = crm_str_table_new();
404 
405  if (is_remote_node(new_node)) {
406  g_hash_table_insert(new_node->details->attrs, strdup(CRM_ATTR_KIND),
407  strdup("remote"));
408  } else {
409  g_hash_table_insert(new_node->details->attrs, strdup(CRM_ATTR_KIND),
410  strdup("cluster"));
411  }
412 
413  new_node->details->utilization = crm_str_table_new();
414 
415  new_node->details->digest_cache =
416  g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str,
417  destroy_digest_cache);
418 
419  data_set->nodes = g_list_insert_sorted(data_set->nodes, new_node, sort_node_uname);
420  return new_node;
421 }
422 
423 bool
424 remote_id_conflict(const char *remote_name, pe_working_set_t *data)
425 {
426  bool match = FALSE;
427 #if 1
428  pe_find_resource(data->resources, remote_name);
429 #else
430  if (data->name_check == NULL) {
431  data->name_check = g_hash_table_new(crm_str_hash, g_str_equal);
432  for (xml_rsc = __xml_first_child(parent); xml_rsc != NULL; xml_rsc = __xml_next_element(xml_rsc)) {
433  const char *id = ID(xml_rsc);
434 
435  /* avoiding heap allocation here because we know the duration of this hashtable allows us to */
436  g_hash_table_insert(data->name_check, (char *) id, (char *) id);
437  }
438  }
439  if (g_hash_table_lookup(data->name_check, remote_name)) {
440  match = TRUE;
441  }
442 #endif
443  if (match) {
444  crm_err("Invalid remote-node name, a resource called '%s' already exists.", remote_name);
445  return NULL;
446  }
447 
448  return match;
449 }
450 
451 
452 static const char *
453 expand_remote_rsc_meta(xmlNode *xml_obj, xmlNode *parent, pe_working_set_t *data)
454 {
455  xmlNode *attr_set = NULL;
456  xmlNode *attr = NULL;
457 
458  const char *container_id = ID(xml_obj);
459  const char *remote_name = NULL;
460  const char *remote_server = NULL;
461  const char *remote_port = NULL;
462  const char *connect_timeout = "60s";
463  const char *remote_allow_migrate=NULL;
464  const char *container_managed = NULL;
465 
466  for (attr_set = __xml_first_child(xml_obj); attr_set != NULL; attr_set = __xml_next_element(attr_set)) {
467  if (safe_str_neq((const char *)attr_set->name, XML_TAG_META_SETS)) {
468  continue;
469  }
470 
471  for (attr = __xml_first_child(attr_set); attr != NULL; attr = __xml_next_element(attr)) {
472  const char *value = crm_element_value(attr, XML_NVPAIR_ATTR_VALUE);
473  const char *name = crm_element_value(attr, XML_NVPAIR_ATTR_NAME);
474 
476  remote_name = value;
477  } else if (safe_str_eq(name, "remote-addr")) {
478  remote_server = value;
479  } else if (safe_str_eq(name, "remote-port")) {
480  remote_port = value;
481  } else if (safe_str_eq(name, "remote-connect-timeout")) {
482  connect_timeout = value;
483  } else if (safe_str_eq(name, "remote-allow-migrate")) {
484  remote_allow_migrate=value;
485  } else if (safe_str_eq(name, XML_RSC_ATTR_MANAGED)) {
486  container_managed = value;
487  }
488  }
489  }
490 
491  if (remote_name == NULL) {
492  return NULL;
493  }
494 
495  if (remote_id_conflict(remote_name, data)) {
496  return NULL;
497  }
498 
499  pe_create_remote_xml(parent, remote_name, container_id,
500  remote_allow_migrate, container_managed, "30s", "30s",
501  connect_timeout, remote_server, remote_port);
502  return remote_name;
503 }
504 
505 static void
506 handle_startup_fencing(pe_working_set_t *data_set, node_t *new_node)
507 {
508  if ((new_node->details->type == node_remote) && (new_node->details->remote_rsc == NULL)) {
509  /* Ignore fencing for remote nodes that don't have a connection resource
510  * associated with them. This happens when remote node entries get left
511  * in the nodes section after the connection resource is removed.
512  */
513  return;
514  }
515 
516  if (is_set(data_set->flags, pe_flag_startup_fencing)) {
517  // All nodes are unclean until we've seen their status entry
518  new_node->details->unclean = TRUE;
519 
520  } else {
521  // Blind faith ...
522  new_node->details->unclean = FALSE;
523  }
524 
525  /* We need to be able to determine if a node's status section
526  * exists or not separate from whether the node is unclean. */
527  new_node->details->unseen = TRUE;
528 }
529 
530 gboolean
531 unpack_nodes(xmlNode * xml_nodes, pe_working_set_t * data_set)
532 {
533  xmlNode *xml_obj = NULL;
534  node_t *new_node = NULL;
535  const char *id = NULL;
536  const char *uname = NULL;
537  const char *type = NULL;
538  const char *score = NULL;
539 
540  for (xml_obj = __xml_first_child(xml_nodes); xml_obj != NULL; xml_obj = __xml_next_element(xml_obj)) {
541  if (crm_str_eq((const char *)xml_obj->name, XML_CIB_TAG_NODE, TRUE)) {
542  new_node = NULL;
543 
544  id = crm_element_value(xml_obj, XML_ATTR_ID);
545  uname = crm_element_value(xml_obj, XML_ATTR_UNAME);
546  type = crm_element_value(xml_obj, XML_ATTR_TYPE);
547  score = crm_element_value(xml_obj, XML_RULE_ATTR_SCORE);
548  crm_trace("Processing node %s/%s", uname, id);
549 
550  if (id == NULL) {
551  crm_config_err("Must specify id tag in <node>");
552  continue;
553  }
554  new_node = pe_create_node(id, uname, type, score, data_set);
555 
556  if (new_node == NULL) {
557  return FALSE;
558  }
559 
560 /* if(data_set->have_quorum == FALSE */
561 /* && data_set->no_quorum_policy == no_quorum_stop) { */
562 /* /\* start shutting resources down *\/ */
563 /* new_node->weight = -INFINITY; */
564 /* } */
565 
566  handle_startup_fencing(data_set, new_node);
567 
568  add_node_attrs(xml_obj, new_node, FALSE, data_set);
569  unpack_instance_attributes(data_set->input, xml_obj, XML_TAG_UTILIZATION, NULL,
570  new_node->details->utilization, NULL, FALSE, data_set->now);
571 
572  crm_trace("Done with node %s", crm_element_value(xml_obj, XML_ATTR_UNAME));
573  }
574  }
575 
576  if (data_set->localhost && pe_find_node(data_set->nodes, data_set->localhost) == NULL) {
577  crm_info("Creating a fake local node");
578  pe_create_node(data_set->localhost, data_set->localhost, NULL, 0,
579  data_set);
580  }
581 
582  return TRUE;
583 }
584 
585 static void
586 setup_container(resource_t * rsc, pe_working_set_t * data_set)
587 {
588  const char *container_id = NULL;
589 
590  if (rsc->children) {
591  GListPtr gIter = rsc->children;
592 
593  for (; gIter != NULL; gIter = gIter->next) {
594  resource_t *child_rsc = (resource_t *) gIter->data;
595 
596  setup_container(child_rsc, data_set);
597  }
598  return;
599  }
600 
601  container_id = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_CONTAINER);
602  if (container_id && safe_str_neq(container_id, rsc->id)) {
603  resource_t *container = pe_find_resource(data_set->resources, container_id);
604 
605  if (container) {
606  rsc->container = container;
607  set_bit(container->flags, pe_rsc_is_container);
608  container->fillers = g_list_append(container->fillers, rsc);
609  pe_rsc_trace(rsc, "Resource %s's container is %s", rsc->id, container_id);
610  } else {
611  pe_err("Resource %s: Unknown resource container (%s)", rsc->id, container_id);
612  }
613  }
614 }
615 
616 gboolean
617 unpack_remote_nodes(xmlNode * xml_resources, pe_working_set_t * data_set)
618 {
619  xmlNode *xml_obj = NULL;
620 
621  /* generate remote nodes from resource config before unpacking resources */
622  for (xml_obj = __xml_first_child(xml_resources); xml_obj != NULL; xml_obj = __xml_next_element(xml_obj)) {
623  const char *new_node_id = NULL;
624 
625  /* first check if this is a bare metal remote node. Bare metal remote nodes
626  * are defined as a resource primitive only. */
627  if (xml_contains_remote_node(xml_obj)) {
628  new_node_id = ID(xml_obj);
629  /* The "pe_find_node" check is here to make sure we don't iterate over
630  * an expanded node that has already been added to the node list. */
631  if (new_node_id && pe_find_node(data_set->nodes, new_node_id) == NULL) {
632  crm_trace("Found baremetal remote node %s in container resource %s", new_node_id, ID(xml_obj));
633  pe_create_node(new_node_id, new_node_id, "remote", NULL,
634  data_set);
635  }
636  continue;
637  }
638 
639  /* Now check for guest remote nodes.
640  * guest remote nodes are defined within a resource primitive.
641  * Example1: a vm resource might be configured as a remote node.
642  * Example2: a vm resource might be configured within a group to be a remote node.
643  * Note: right now we only support guest remote nodes in as a standalone primitive
644  * or a primitive within a group. No cloned primitives can be a guest remote node
645  * right now */
646  if (crm_str_eq((const char *)xml_obj->name, XML_CIB_TAG_RESOURCE, TRUE)) {
647  /* expands a metadata defined remote resource into the xml config
648  * as an actual rsc primitive to be unpacked later. */
649  new_node_id = expand_remote_rsc_meta(xml_obj, xml_resources, data_set);
650 
651  if (new_node_id && pe_find_node(data_set->nodes, new_node_id) == NULL) {
652  crm_trace("Found guest remote node %s in container resource %s", new_node_id, ID(xml_obj));
653  pe_create_node(new_node_id, new_node_id, "remote", NULL,
654  data_set);
655  }
656  continue;
657 
658  } else if (crm_str_eq((const char *)xml_obj->name, XML_CIB_TAG_GROUP, TRUE)) {
659  xmlNode *xml_obj2 = NULL;
660  /* search through a group to see if any of the primitive contain a remote node. */
661  for (xml_obj2 = __xml_first_child(xml_obj); xml_obj2 != NULL; xml_obj2 = __xml_next_element(xml_obj2)) {
662 
663  new_node_id = expand_remote_rsc_meta(xml_obj2, xml_resources, data_set);
664 
665  if (new_node_id && pe_find_node(data_set->nodes, new_node_id) == NULL) {
666  crm_trace("Found guest remote node %s in container resource %s which is in group %s", new_node_id, ID(xml_obj2), ID(xml_obj));
667  pe_create_node(new_node_id, new_node_id, "remote", NULL,
668  data_set);
669  }
670  }
671  }
672  }
673  return TRUE;
674 }
675 
676 
677 /* Call this after all the nodes and resources have been
678  * unpacked, but before the status section is read.
679  *
680  * A remote node's online status is reflected by the state
681  * of the remote node's connection resource. We need to link
682  * the remote node to this connection resource so we can have
683  * easy access to the connection resource during the PE calculations.
684  */
685 static void
686 link_rsc2remotenode(pe_working_set_t *data_set, resource_t *new_rsc)
687 {
688  node_t *remote_node = NULL;
689 
690  if (new_rsc->is_remote_node == FALSE) {
691  return;
692  }
693 
694  if (is_set(data_set->flags, pe_flag_quick_location)) {
695  /* remote_nodes and remote_resources are not linked in quick location calculations */
696  return;
697  }
698 
699  print_resource(LOG_DEBUG_3, "Linking remote-node connection resource, ", new_rsc, FALSE);
700 
701  remote_node = pe_find_node(data_set->nodes, new_rsc->id);
702  CRM_CHECK(remote_node != NULL, return;);
703 
704  remote_node->details->remote_rsc = new_rsc;
705  /* If this is a baremetal remote-node (no container resource
706  * associated with it) then we need to handle startup fencing the same way
707  * as cluster nodes. */
708  if (new_rsc->container == NULL) {
709  handle_startup_fencing(data_set, remote_node);
710  } else {
711  /* At this point we know if the remote node is a container or baremetal
712  * remote node, update the #kind attribute if a container is involved */
713  g_hash_table_replace(remote_node->details->attrs, strdup(CRM_ATTR_KIND),
714  strdup("container"));
715  }
716 }
717 
718 static void
719 destroy_tag(gpointer data)
720 {
721  tag_t *tag = data;
722 
723  if (tag) {
724  free(tag->id);
725  g_list_free_full(tag->refs, free);
726  free(tag);
727  }
728 }
729 
742 gboolean
743 unpack_resources(xmlNode * xml_resources, pe_working_set_t * data_set)
744 {
745  xmlNode *xml_obj = NULL;
746  GListPtr gIter = NULL;
747 
748  data_set->template_rsc_sets =
749  g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str,
750  destroy_tag);
751 
752  for (xml_obj = __xml_first_child(xml_resources); xml_obj != NULL; xml_obj = __xml_next_element(xml_obj)) {
753  resource_t *new_rsc = NULL;
754 
755  if (crm_str_eq((const char *)xml_obj->name, XML_CIB_TAG_RSC_TEMPLATE, TRUE)) {
756  const char *template_id = ID(xml_obj);
757 
758  if (template_id && g_hash_table_lookup_extended(data_set->template_rsc_sets,
759  template_id, NULL, NULL) == FALSE) {
760  /* Record the template's ID for the knowledge of its existence anyway. */
761  g_hash_table_insert(data_set->template_rsc_sets, strdup(template_id), NULL);
762  }
763  continue;
764  }
765 
766  crm_trace("Beginning unpack... <%s id=%s... >", crm_element_name(xml_obj), ID(xml_obj));
767  if (common_unpack(xml_obj, &new_rsc, NULL, data_set)) {
768  data_set->resources = g_list_append(data_set->resources, new_rsc);
769  print_resource(LOG_DEBUG_3, "Added ", new_rsc, FALSE);
770 
771  } else {
772  crm_config_err("Failed unpacking %s %s",
773  crm_element_name(xml_obj), crm_element_value(xml_obj, XML_ATTR_ID));
774  if (new_rsc != NULL && new_rsc->fns != NULL) {
775  new_rsc->fns->free(new_rsc);
776  }
777  }
778  }
779 
780  for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) {
781  resource_t *rsc = (resource_t *) gIter->data;
782 
783  setup_container(rsc, data_set);
784  link_rsc2remotenode(data_set, rsc);
785  }
786 
787  data_set->resources = g_list_sort(data_set->resources, sort_rsc_priority);
788  if (is_set(data_set->flags, pe_flag_quick_location)) {
789  /* Ignore */
790 
791  } else if (is_set(data_set->flags, pe_flag_stonith_enabled)
792  && is_set(data_set->flags, pe_flag_have_stonith_resource) == FALSE) {
793 
794  crm_config_err("Resource start-up disabled since no STONITH resources have been defined");
795  crm_config_err("Either configure some or disable STONITH with the stonith-enabled option");
796  crm_config_err("NOTE: Clusters with shared data need STONITH to ensure data integrity");
797  }
798 
799  return TRUE;
800 }
801 
802 gboolean
803 unpack_tags(xmlNode * xml_tags, pe_working_set_t * data_set)
804 {
805  xmlNode *xml_tag = NULL;
806 
807  data_set->tags =
808  g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, destroy_tag);
809 
810  for (xml_tag = __xml_first_child(xml_tags); xml_tag != NULL; xml_tag = __xml_next_element(xml_tag)) {
811  xmlNode *xml_obj_ref = NULL;
812  const char *tag_id = ID(xml_tag);
813 
814  if (crm_str_eq((const char *)xml_tag->name, XML_CIB_TAG_TAG, TRUE) == FALSE) {
815  continue;
816  }
817 
818  if (tag_id == NULL) {
819  crm_config_err("Failed unpacking %s: %s should be specified",
820  crm_element_name(xml_tag), XML_ATTR_ID);
821  continue;
822  }
823 
824  for (xml_obj_ref = __xml_first_child(xml_tag); xml_obj_ref != NULL; xml_obj_ref = __xml_next_element(xml_obj_ref)) {
825  const char *obj_ref = ID(xml_obj_ref);
826 
827  if (crm_str_eq((const char *)xml_obj_ref->name, XML_CIB_TAG_OBJ_REF, TRUE) == FALSE) {
828  continue;
829  }
830 
831  if (obj_ref == NULL) {
832  crm_config_err("Failed unpacking %s for tag %s: %s should be specified",
833  crm_element_name(xml_obj_ref), tag_id, XML_ATTR_ID);
834  continue;
835  }
836 
837  if (add_tag_ref(data_set->tags, tag_id, obj_ref) == FALSE) {
838  return FALSE;
839  }
840  }
841  }
842 
843  return TRUE;
844 }
845 
846 /* The ticket state section:
847  * "/cib/status/tickets/ticket_state" */
848 static gboolean
849 unpack_ticket_state(xmlNode * xml_ticket, pe_working_set_t * data_set)
850 {
851  const char *ticket_id = NULL;
852  const char *granted = NULL;
853  const char *last_granted = NULL;
854  const char *standby = NULL;
855  xmlAttrPtr xIter = NULL;
856 
857  ticket_t *ticket = NULL;
858 
859  ticket_id = ID(xml_ticket);
860  if (ticket_id == NULL || strlen(ticket_id) == 0) {
861  return FALSE;
862  }
863 
864  crm_trace("Processing ticket state for %s", ticket_id);
865 
866  ticket = g_hash_table_lookup(data_set->tickets, ticket_id);
867  if (ticket == NULL) {
868  ticket = ticket_new(ticket_id, data_set);
869  if (ticket == NULL) {
870  return FALSE;
871  }
872  }
873 
874  for (xIter = xml_ticket->properties; xIter; xIter = xIter->next) {
875  const char *prop_name = (const char *)xIter->name;
876  const char *prop_value = crm_element_value(xml_ticket, prop_name);
877 
878  if (crm_str_eq(prop_name, XML_ATTR_ID, TRUE)) {
879  continue;
880  }
881  g_hash_table_replace(ticket->state, strdup(prop_name), strdup(prop_value));
882  }
883 
884  granted = g_hash_table_lookup(ticket->state, "granted");
885  if (granted && crm_is_true(granted)) {
886  ticket->granted = TRUE;
887  crm_info("We have ticket '%s'", ticket->id);
888  } else {
889  ticket->granted = FALSE;
890  crm_info("We do not have ticket '%s'", ticket->id);
891  }
892 
893  last_granted = g_hash_table_lookup(ticket->state, "last-granted");
894  if (last_granted) {
895  ticket->last_granted = crm_parse_int(last_granted, 0);
896  }
897 
898  standby = g_hash_table_lookup(ticket->state, "standby");
899  if (standby && crm_is_true(standby)) {
900  ticket->standby = TRUE;
901  if (ticket->granted) {
902  crm_info("Granted ticket '%s' is in standby-mode", ticket->id);
903  }
904  } else {
905  ticket->standby = FALSE;
906  }
907 
908  crm_trace("Done with ticket state for %s", ticket_id);
909 
910  return TRUE;
911 }
912 
913 static gboolean
914 unpack_tickets_state(xmlNode * xml_tickets, pe_working_set_t * data_set)
915 {
916  xmlNode *xml_obj = NULL;
917 
918  for (xml_obj = __xml_first_child(xml_tickets); xml_obj != NULL; xml_obj = __xml_next_element(xml_obj)) {
919  if (crm_str_eq((const char *)xml_obj->name, XML_CIB_TAG_TICKET_STATE, TRUE) == FALSE) {
920  continue;
921  }
922  unpack_ticket_state(xml_obj, data_set);
923  }
924 
925  return TRUE;
926 }
927 
928 /* @COMPAT DC < 1.1.7: Compatibility with the deprecated ticket state section:
929  * "/cib/status/tickets/instance_attributes" */
930 static void
931 get_ticket_state_legacy(gpointer key, gpointer value, gpointer user_data)
932 {
933  const char *long_key = key;
934  char *state_key = NULL;
935 
936  const char *granted_prefix = "granted-ticket-";
937  const char *last_granted_prefix = "last-granted-";
938  static int granted_prefix_strlen = 0;
939  static int last_granted_prefix_strlen = 0;
940 
941  const char *ticket_id = NULL;
942  const char *is_granted = NULL;
943  const char *last_granted = NULL;
944  const char *sep = NULL;
945 
946  ticket_t *ticket = NULL;
947  pe_working_set_t *data_set = user_data;
948 
949  if (granted_prefix_strlen == 0) {
950  granted_prefix_strlen = strlen(granted_prefix);
951  }
952 
953  if (last_granted_prefix_strlen == 0) {
954  last_granted_prefix_strlen = strlen(last_granted_prefix);
955  }
956 
957  if (strstr(long_key, granted_prefix) == long_key) {
958  ticket_id = long_key + granted_prefix_strlen;
959  if (strlen(ticket_id)) {
960  state_key = strdup("granted");
961  is_granted = value;
962  }
963  } else if (strstr(long_key, last_granted_prefix) == long_key) {
964  ticket_id = long_key + last_granted_prefix_strlen;
965  if (strlen(ticket_id)) {
966  state_key = strdup("last-granted");
967  last_granted = value;
968  }
969  } else if ((sep = strrchr(long_key, '-'))) {
970  ticket_id = sep + 1;
971  state_key = strndup(long_key, strlen(long_key) - strlen(sep));
972  }
973 
974  if (ticket_id == NULL || strlen(ticket_id) == 0) {
975  free(state_key);
976  return;
977  }
978 
979  if (state_key == NULL || strlen(state_key) == 0) {
980  free(state_key);
981  return;
982  }
983 
984  ticket = g_hash_table_lookup(data_set->tickets, ticket_id);
985  if (ticket == NULL) {
986  ticket = ticket_new(ticket_id, data_set);
987  if (ticket == NULL) {
988  free(state_key);
989  return;
990  }
991  }
992 
993  g_hash_table_replace(ticket->state, state_key, strdup(value));
994 
995  if (is_granted) {
996  if (crm_is_true(is_granted)) {
997  ticket->granted = TRUE;
998  crm_info("We have ticket '%s'", ticket->id);
999  } else {
1000  ticket->granted = FALSE;
1001  crm_info("We do not have ticket '%s'", ticket->id);
1002  }
1003 
1004  } else if (last_granted) {
1005  ticket->last_granted = crm_parse_int(last_granted, 0);
1006  }
1007 }
1008 
1009 static void
1010 unpack_handle_remote_attrs(node_t *this_node, xmlNode *state, pe_working_set_t * data_set)
1011 {
1012  const char *resource_discovery_enabled = NULL;
1013  xmlNode *attrs = NULL;
1014  resource_t *rsc = NULL;
1015  const char *shutdown = NULL;
1016 
1017  if (crm_str_eq((const char *)state->name, XML_CIB_TAG_STATE, TRUE) == FALSE) {
1018  return;
1019  }
1020 
1021  if ((this_node == NULL) || (is_remote_node(this_node) == FALSE)) {
1022  return;
1023  }
1024  crm_trace("Processing remote node id=%s, uname=%s", this_node->details->id, this_node->details->uname);
1025 
1026  this_node->details->remote_maintenance =
1028 
1029  rsc = this_node->details->remote_rsc;
1030  if (this_node->details->remote_requires_reset == FALSE) {
1031  this_node->details->unclean = FALSE;
1032  this_node->details->unseen = FALSE;
1033  }
1034  attrs = find_xml_node(state, XML_TAG_TRANSIENT_NODEATTRS, FALSE);
1035  add_node_attrs(attrs, this_node, TRUE, data_set);
1036 
1037  shutdown = pe_node_attribute_raw(this_node, XML_CIB_ATTR_SHUTDOWN);
1038  if (shutdown != NULL && safe_str_neq("0", shutdown)) {
1039  crm_info("Node %s is shutting down", this_node->details->uname);
1040  this_node->details->shutdown = TRUE;
1041  if (rsc) {
1042  rsc->next_role = RSC_ROLE_STOPPED;
1043  }
1044  }
1045 
1046  if (crm_is_true(pe_node_attribute_raw(this_node, "standby"))) {
1047  crm_info("Node %s is in standby-mode", this_node->details->uname);
1048  this_node->details->standby = TRUE;
1049  }
1050 
1051  if (crm_is_true(pe_node_attribute_raw(this_node, "maintenance")) ||
1052  (rsc && !is_set(rsc->flags, pe_rsc_managed))) {
1053  crm_info("Node %s is in maintenance-mode", this_node->details->uname);
1054  this_node->details->maintenance = TRUE;
1055  }
1056 
1057  resource_discovery_enabled = pe_node_attribute_raw(this_node, XML_NODE_ATTR_RSC_DISCOVERY);
1058  if (resource_discovery_enabled && !crm_is_true(resource_discovery_enabled)) {
1059  if (is_baremetal_remote_node(this_node) && is_not_set(data_set->flags, pe_flag_stonith_enabled)) {
1060  crm_warn("ignoring %s attribute on baremetal remote node %s, disabling resource discovery requires stonith to be enabled.",
1062  } else {
1063  /* if we're here, this is either a baremetal node and fencing is enabled,
1064  * or this is a container node which we don't care if fencing is enabled
1065  * or not on. container nodes are 'fenced' by recovering the container resource
1066  * regardless of whether fencing is enabled. */
1067  crm_info("Node %s has resource discovery disabled", this_node->details->uname);
1068  this_node->details->rsc_discovery_enabled = FALSE;
1069  }
1070  }
1071 }
1072 
1073 static bool
1074 unpack_node_loop(xmlNode * status, bool fence, pe_working_set_t * data_set)
1075 {
1076  bool changed = false;
1077  xmlNode *lrm_rsc = NULL;
1078 
1079  for (xmlNode *state = __xml_first_child(status); state != NULL; state = __xml_next_element(state)) {
1080  const char *id = NULL;
1081  const char *uname = NULL;
1082  node_t *this_node = NULL;
1083  bool process = FALSE;
1084 
1085  if (crm_str_eq((const char *)state->name, XML_CIB_TAG_STATE, TRUE) == FALSE) {
1086  continue;
1087  }
1088 
1089  id = crm_element_value(state, XML_ATTR_ID);
1090  uname = crm_element_value(state, XML_ATTR_UNAME);
1091  this_node = pe_find_node_any(data_set->nodes, id, uname);
1092 
1093  if (this_node == NULL) {
1094  crm_info("Node %s is unknown", id);
1095  continue;
1096 
1097  } else if (this_node->details->unpacked) {
1098  crm_info("Node %s is already processed", id);
1099  continue;
1100 
1101  } else if (is_remote_node(this_node) == FALSE && is_set(data_set->flags, pe_flag_stonith_enabled)) {
1102  // A redundant test, but preserves the order for regression tests
1103  process = TRUE;
1104 
1105  } else if (is_remote_node(this_node)) {
1106  bool check = FALSE;
1107  resource_t *rsc = this_node->details->remote_rsc;
1108 
1109  if(fence) {
1110  check = TRUE;
1111 
1112  } else if(rsc == NULL) {
1113  /* Not ready yet */
1114 
1115  } else if (is_container_remote_node(this_node)
1116  && rsc->role == RSC_ROLE_STARTED
1117  && rsc->container->role == RSC_ROLE_STARTED) {
1118  /* Both the connection and the underlying container
1119  * need to be known 'up' before we volunterily process
1120  * resources inside it
1121  */
1122  check = TRUE;
1123  crm_trace("Checking node %s/%s/%s status %d/%d/%d", id, rsc->id, rsc->container->id, fence, rsc->role, RSC_ROLE_STARTED);
1124 
1125  } else if (is_container_remote_node(this_node) == FALSE
1126  && rsc->role == RSC_ROLE_STARTED) {
1127  check = TRUE;
1128  crm_trace("Checking node %s/%s status %d/%d/%d", id, rsc->id, fence, rsc->role, RSC_ROLE_STARTED);
1129  }
1130 
1131  if (check) {
1132  determine_remote_online_status(data_set, this_node);
1133  unpack_handle_remote_attrs(this_node, state, data_set);
1134  process = TRUE;
1135  }
1136 
1137  } else if (this_node->details->online) {
1138  process = TRUE;
1139 
1140  } else if (fence) {
1141  process = TRUE;
1142  }
1143 
1144  if(process) {
1145  crm_trace("Processing lrm resource entries on %shealthy%s node: %s",
1146  fence?"un":"", is_remote_node(this_node)?" remote":"",
1147  this_node->details->uname);
1148  changed = TRUE;
1149  this_node->details->unpacked = TRUE;
1150 
1151  lrm_rsc = find_xml_node(state, XML_CIB_TAG_LRM, FALSE);
1152  lrm_rsc = find_xml_node(lrm_rsc, XML_LRM_TAG_RESOURCES, FALSE);
1153  unpack_lrm_resources(this_node, lrm_rsc, data_set);
1154  }
1155  }
1156  return changed;
1157 }
1158 
1159 /* remove nodes that are down, stopping */
1160 /* create +ve rsc_to_node constraints between resources and the nodes they are running on */
1161 /* anything else? */
1162 gboolean
1163 unpack_status(xmlNode * status, pe_working_set_t * data_set)
1164 {
1165  const char *id = NULL;
1166  const char *uname = NULL;
1167 
1168  xmlNode *state = NULL;
1169  node_t *this_node = NULL;
1170 
1171  crm_trace("Beginning unpack");
1172 
1173  if (data_set->tickets == NULL) {
1174  data_set->tickets =
1175  g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, destroy_ticket);
1176  }
1177 
1178  for (state = __xml_first_child(status); state != NULL; state = __xml_next_element(state)) {
1179  if (crm_str_eq((const char *)state->name, XML_CIB_TAG_TICKETS, TRUE)) {
1180  xmlNode *xml_tickets = state;
1181  GHashTable *state_hash = NULL;
1182 
1183  /* @COMPAT DC < 1.1.7: Compatibility with the deprecated ticket state section:
1184  * Unpack the attributes in the deprecated "/cib/status/tickets/instance_attributes" if it exists. */
1185  state_hash = crm_str_table_new();
1186 
1187  unpack_instance_attributes(data_set->input, xml_tickets, XML_TAG_ATTR_SETS, NULL,
1188  state_hash, NULL, TRUE, data_set->now);
1189 
1190  g_hash_table_foreach(state_hash, get_ticket_state_legacy, data_set);
1191 
1192  if (state_hash) {
1193  g_hash_table_destroy(state_hash);
1194  }
1195 
1196  /* Unpack the new "/cib/status/tickets/ticket_state"s */
1197  unpack_tickets_state(xml_tickets, data_set);
1198  }
1199 
1200  if (crm_str_eq((const char *)state->name, XML_CIB_TAG_STATE, TRUE)) {
1201  xmlNode *attrs = NULL;
1202  const char *resource_discovery_enabled = NULL;
1203 
1204  id = crm_element_value(state, XML_ATTR_ID);
1205  uname = crm_element_value(state, XML_ATTR_UNAME);
1206  this_node = pe_find_node_any(data_set->nodes, id, uname);
1207 
1208  if (uname == NULL) {
1209  /* error */
1210  continue;
1211 
1212  } else if (this_node == NULL) {
1213  crm_config_warn("Node %s in status section no longer exists", uname);
1214  continue;
1215 
1216  } else if (is_remote_node(this_node)) {
1217  /* online state for remote nodes is determined by the
1218  * rsc state after all the unpacking is done. we do however
1219  * need to mark whether or not the node has been fenced as this plays
1220  * a role during unpacking cluster node resource state */
1221  this_node->details->remote_was_fenced =
1223  continue;
1224  }
1225 
1226  crm_trace("Processing node id=%s, uname=%s", id, uname);
1227 
1228  /* Mark the node as provisionally clean
1229  * - at least we have seen it in the current cluster's lifetime
1230  */
1231  this_node->details->unclean = FALSE;
1232  this_node->details->unseen = FALSE;
1233  attrs = find_xml_node(state, XML_TAG_TRANSIENT_NODEATTRS, FALSE);
1234  add_node_attrs(attrs, this_node, TRUE, data_set);
1235 
1236  if (crm_is_true(pe_node_attribute_raw(this_node, "standby"))) {
1237  crm_info("Node %s is in standby-mode", this_node->details->uname);
1238  this_node->details->standby = TRUE;
1239  }
1240 
1241  if (crm_is_true(pe_node_attribute_raw(this_node, "maintenance"))) {
1242  crm_info("Node %s is in maintenance-mode", this_node->details->uname);
1243  this_node->details->maintenance = TRUE;
1244  }
1245 
1246  resource_discovery_enabled = pe_node_attribute_raw(this_node, XML_NODE_ATTR_RSC_DISCOVERY);
1247  if (resource_discovery_enabled && !crm_is_true(resource_discovery_enabled)) {
1248  crm_warn("ignoring %s attribute on node %s, disabling resource discovery is not allowed on cluster nodes",
1250  }
1251 
1252  crm_trace("determining node state");
1253  determine_online_status(state, this_node, data_set);
1254 
1255  if (is_not_set(data_set->flags, pe_flag_have_quorum)
1256  && this_node->details->online
1257  && (data_set->no_quorum_policy == no_quorum_suicide)) {
1258  /* Everything else should flow from this automatically
1259  * At least until the PE becomes able to migrate off healthy resources
1260  */
1261  pe_fence_node(data_set, this_node, "cluster does not have quorum");
1262  }
1263  }
1264  }
1265 
1266 
1267  while(unpack_node_loop(status, FALSE, data_set)) {
1268  crm_trace("Start another loop");
1269  }
1270 
1271  // Now catch any nodes we didn't see
1272  unpack_node_loop(status, is_set(data_set->flags, pe_flag_stonith_enabled), data_set);
1273 
1274  for (GListPtr gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) {
1275  node_t *this_node = gIter->data;
1276 
1277  if (this_node == NULL) {
1278  continue;
1279  } else if(is_remote_node(this_node) == FALSE) {
1280  continue;
1281  } else if(this_node->details->unpacked) {
1282  continue;
1283  }
1284  determine_remote_online_status(data_set, this_node);
1285  }
1286 
1287  return TRUE;
1288 }
1289 
1290 static gboolean
1291 determine_online_status_no_fencing(pe_working_set_t * data_set, xmlNode * node_state,
1292  node_t * this_node)
1293 {
1294  gboolean online = FALSE;
1295  const char *join = crm_element_value(node_state, XML_NODE_JOIN_STATE);
1296  const char *is_peer = crm_element_value(node_state, XML_NODE_IS_PEER);
1297  const char *in_cluster = crm_element_value(node_state, XML_NODE_IN_CLUSTER);
1298  const char *exp_state = crm_element_value(node_state, XML_NODE_EXPECTED);
1299 
1300  if (!crm_is_true(in_cluster)) {
1301  crm_trace("Node is down: in_cluster=%s", crm_str(in_cluster));
1302 
1303  } else if (safe_str_eq(is_peer, ONLINESTATUS)) {
1304  if (safe_str_eq(join, CRMD_JOINSTATE_MEMBER)) {
1305  online = TRUE;
1306  } else {
1307  crm_debug("Node is not ready to run resources: %s", join);
1308  }
1309 
1310  } else if (this_node->details->expected_up == FALSE) {
1311  crm_trace("CRMd is down: in_cluster=%s", crm_str(in_cluster));
1312  crm_trace("\tis_peer=%s, join=%s, expected=%s",
1313  crm_str(is_peer), crm_str(join), crm_str(exp_state));
1314 
1315  } else {
1316  /* mark it unclean */
1317  pe_fence_node(data_set, this_node, "peer is unexpectedly down");
1318  crm_info("\tin_cluster=%s, is_peer=%s, join=%s, expected=%s",
1319  crm_str(in_cluster), crm_str(is_peer), crm_str(join), crm_str(exp_state));
1320  }
1321  return online;
1322 }
1323 
1324 static gboolean
1325 determine_online_status_fencing(pe_working_set_t * data_set, xmlNode * node_state,
1326  node_t * this_node)
1327 {
1328  gboolean online = FALSE;
1329  gboolean do_terminate = FALSE;
1330  const char *join = crm_element_value(node_state, XML_NODE_JOIN_STATE);
1331  const char *is_peer = crm_element_value(node_state, XML_NODE_IS_PEER);
1332  const char *in_cluster = crm_element_value(node_state, XML_NODE_IN_CLUSTER);
1333  const char *exp_state = crm_element_value(node_state, XML_NODE_EXPECTED);
1334  const char *terminate = pe_node_attribute_raw(this_node, "terminate");
1335 
1336 /*
1337  - XML_NODE_IN_CLUSTER ::= true|false
1338  - XML_NODE_IS_PEER ::= true|false|online|offline
1339  - XML_NODE_JOIN_STATE ::= member|down|pending|banned
1340  - XML_NODE_EXPECTED ::= member|down
1341 */
1342 
1343  if (crm_is_true(terminate)) {
1344  do_terminate = TRUE;
1345 
1346  } else if (terminate != NULL && strlen(terminate) > 0) {
1347  /* could be a time() value */
1348  char t = terminate[0];
1349 
1350  if (t != '0' && isdigit(t)) {
1351  do_terminate = TRUE;
1352  }
1353  }
1354 
1355  crm_trace("%s: in_cluster=%s, is_peer=%s, join=%s, expected=%s, term=%d",
1356  this_node->details->uname, crm_str(in_cluster), crm_str(is_peer),
1357  crm_str(join), crm_str(exp_state), do_terminate);
1358 
1359  online = crm_is_true(in_cluster);
1360  if (safe_str_eq(is_peer, ONLINESTATUS)) {
1361  is_peer = XML_BOOLEAN_YES;
1362  }
1363  if (exp_state == NULL) {
1364  exp_state = CRMD_JOINSTATE_DOWN;
1365  }
1366 
1367  if (this_node->details->shutdown) {
1368  crm_debug("%s is shutting down", this_node->details->uname);
1369 
1370  /* Slightly different criteria since we can't shut down a dead peer */
1371  online = crm_is_true(is_peer);
1372 
1373  } else if (in_cluster == NULL) {
1374  pe_fence_node(data_set, this_node, "peer has not been seen by the cluster");
1375 
1376  } else if (safe_str_eq(join, CRMD_JOINSTATE_NACK)) {
1377  pe_fence_node(data_set, this_node, "peer failed the pacemaker membership criteria");
1378 
1379  } else if (do_terminate == FALSE && safe_str_eq(exp_state, CRMD_JOINSTATE_DOWN)) {
1380 
1381  if (crm_is_true(in_cluster) || crm_is_true(is_peer)) {
1382  crm_info("- Node %s is not ready to run resources", this_node->details->uname);
1383  this_node->details->standby = TRUE;
1384  this_node->details->pending = TRUE;
1385 
1386  } else {
1387  crm_trace("%s is down or still coming up", this_node->details->uname);
1388  }
1389 
1390  } else if (do_terminate && safe_str_eq(join, CRMD_JOINSTATE_DOWN)
1391  && crm_is_true(in_cluster) == FALSE && crm_is_true(is_peer) == FALSE) {
1392  crm_info("Node %s was just shot", this_node->details->uname);
1393  online = FALSE;
1394 
1395  } else if (crm_is_true(in_cluster) == FALSE) {
1396  pe_fence_node(data_set, this_node, "peer is no longer part of the cluster");
1397 
1398  } else if (crm_is_true(is_peer) == FALSE) {
1399  pe_fence_node(data_set, this_node, "peer process is no longer available");
1400 
1401  /* Everything is running at this point, now check join state */
1402  } else if (do_terminate) {
1403  pe_fence_node(data_set, this_node, "termination was requested");
1404 
1405  } else if (safe_str_eq(join, CRMD_JOINSTATE_MEMBER)) {
1406  crm_info("Node %s is active", this_node->details->uname);
1407 
1408  } else if (safe_str_eq(join, CRMD_JOINSTATE_PENDING)
1409  || safe_str_eq(join, CRMD_JOINSTATE_DOWN)) {
1410  crm_info("Node %s is not ready to run resources", this_node->details->uname);
1411  this_node->details->standby = TRUE;
1412  this_node->details->pending = TRUE;
1413 
1414  } else {
1415  pe_fence_node(data_set, this_node, "peer was in an unknown state");
1416  crm_warn("%s: in-cluster=%s, is-peer=%s, join=%s, expected=%s, term=%d, shutdown=%d",
1417  this_node->details->uname, crm_str(in_cluster), crm_str(is_peer),
1418  crm_str(join), crm_str(exp_state), do_terminate, this_node->details->shutdown);
1419  }
1420 
1421  return online;
1422 }
1423 
1424 static gboolean
1425 determine_remote_online_status(pe_working_set_t * data_set, node_t * this_node)
1426 {
1427  resource_t *rsc = this_node->details->remote_rsc;
1428  resource_t *container = NULL;
1429  pe_node_t *host = NULL;
1430 
1431  /* If there is a node state entry for a (former) Pacemaker Remote node
1432  * but no resource creating that node, the node's connection resource will
1433  * be NULL. Consider it an offline remote node in that case.
1434  */
1435  if (rsc == NULL) {
1436  this_node->details->online = FALSE;
1437  goto remote_online_done;
1438  }
1439 
1440  container = rsc->container;
1441 
1442  if (container && (g_list_length(rsc->running_on) == 1)) {
1443  host = rsc->running_on->data;
1444  }
1445 
1446  /* If the resource is currently started, mark it online. */
1447  if (rsc->role == RSC_ROLE_STARTED) {
1448  crm_trace("%s node %s presumed ONLINE because connection resource is started",
1449  (container? "Guest" : "Remote"), this_node->details->id);
1450  this_node->details->online = TRUE;
1451  }
1452 
1453  /* consider this node shutting down if transitioning start->stop */
1454  if (rsc->role == RSC_ROLE_STARTED && rsc->next_role == RSC_ROLE_STOPPED) {
1455  crm_trace("%s node %s shutting down because connection resource is stopping",
1456  (container? "Guest" : "Remote"), this_node->details->id);
1457  this_node->details->shutdown = TRUE;
1458  }
1459 
1460  /* Now check all the failure conditions. */
1461  if(container && is_set(container->flags, pe_rsc_failed)) {
1462  crm_trace("Guest node %s UNCLEAN because guest resource failed",
1463  this_node->details->id);
1464  this_node->details->online = FALSE;
1465  this_node->details->remote_requires_reset = TRUE;
1466 
1467  } else if(is_set(rsc->flags, pe_rsc_failed)) {
1468  crm_trace("%s node %s OFFLINE because connection resource failed",
1469  (container? "Guest" : "Remote"), this_node->details->id);
1470  this_node->details->online = FALSE;
1471 
1472  } else if (rsc->role == RSC_ROLE_STOPPED
1473  || (container && container->role == RSC_ROLE_STOPPED)) {
1474 
1475  crm_trace("%s node %s OFFLINE because its resource is stopped",
1476  (container? "Guest" : "Remote"), this_node->details->id);
1477  this_node->details->online = FALSE;
1478  this_node->details->remote_requires_reset = FALSE;
1479 
1480  } else if (host && (host->details->online == FALSE)
1481  && host->details->unclean) {
1482  crm_trace("Guest node %s UNCLEAN because host is unclean",
1483  this_node->details->id);
1484  this_node->details->online = FALSE;
1485  this_node->details->remote_requires_reset = TRUE;
1486  }
1487 
1488 remote_online_done:
1489  crm_trace("Remote node %s online=%s",
1490  this_node->details->id, this_node->details->online ? "TRUE" : "FALSE");
1491  return this_node->details->online;
1492 }
1493 
1494 gboolean
1495 determine_online_status(xmlNode * node_state, node_t * this_node, pe_working_set_t * data_set)
1496 {
1497  gboolean online = FALSE;
1498  const char *shutdown = NULL;
1499  const char *exp_state = crm_element_value(node_state, XML_NODE_EXPECTED);
1500 
1501  if (this_node == NULL) {
1502  crm_config_err("No node to check");
1503  return online;
1504  }
1505 
1506  this_node->details->shutdown = FALSE;
1507  this_node->details->expected_up = FALSE;
1508  shutdown = pe_node_attribute_raw(this_node, XML_CIB_ATTR_SHUTDOWN);
1509 
1510  if (shutdown != NULL && safe_str_neq("0", shutdown)) {
1511  this_node->details->shutdown = TRUE;
1512 
1513  } else if (safe_str_eq(exp_state, CRMD_JOINSTATE_MEMBER)) {
1514  this_node->details->expected_up = TRUE;
1515  }
1516 
1517  if (this_node->details->type == node_ping) {
1518  this_node->details->unclean = FALSE;
1519  online = FALSE; /* As far as resource management is concerned,
1520  * the node is safely offline.
1521  * Anyone caught abusing this logic will be shot
1522  */
1523 
1524  } else if (is_set(data_set->flags, pe_flag_stonith_enabled) == FALSE) {
1525  online = determine_online_status_no_fencing(data_set, node_state, this_node);
1526 
1527  } else {
1528  online = determine_online_status_fencing(data_set, node_state, this_node);
1529  }
1530 
1531  if (online) {
1532  this_node->details->online = TRUE;
1533 
1534  } else {
1535  /* remove node from contention */
1536  this_node->fixed = TRUE;
1537  this_node->weight = -INFINITY;
1538  }
1539 
1540  if (online && this_node->details->shutdown) {
1541  /* don't run resources here */
1542  this_node->fixed = TRUE;
1543  this_node->weight = -INFINITY;
1544  }
1545 
1546  if (this_node->details->type == node_ping) {
1547  crm_info("Node %s is not a pacemaker node", this_node->details->uname);
1548 
1549  } else if (this_node->details->unclean) {
1550  pe_proc_warn("Node %s is unclean", this_node->details->uname);
1551 
1552  } else if (this_node->details->online) {
1553  crm_info("Node %s is %s", this_node->details->uname,
1554  this_node->details->shutdown ? "shutting down" :
1555  this_node->details->pending ? "pending" :
1556  this_node->details->standby ? "standby" :
1557  this_node->details->maintenance ? "maintenance" : "online");
1558 
1559  } else {
1560  crm_trace("Node %s is offline", this_node->details->uname);
1561  }
1562 
1563  return online;
1564 }
1565 
1574 const char *
1575 pe_base_name_end(const char *id)
1576 {
1577  if (!crm_strlen_zero(id)) {
1578  const char *end = id + strlen(id) - 1;
1579 
1580  for (const char *s = end; s > id; --s) {
1581  switch (*s) {
1582  case '0':
1583  case '1':
1584  case '2':
1585  case '3':
1586  case '4':
1587  case '5':
1588  case '6':
1589  case '7':
1590  case '8':
1591  case '9':
1592  break;
1593  case ':':
1594  return (s == end)? s : (s - 1);
1595  default:
1596  return end;
1597  }
1598  }
1599  return end;
1600  }
1601  return NULL;
1602 }
1603 
1614 char *
1615 clone_strip(const char *last_rsc_id)
1616 {
1617  const char *end = pe_base_name_end(last_rsc_id);
1618  char *basename = NULL;
1619 
1620  CRM_ASSERT(end);
1621  basename = strndup(last_rsc_id, end - last_rsc_id + 1);
1622  CRM_ASSERT(basename);
1623  return basename;
1624 }
1625 
1636 char *
1637 clone_zero(const char *last_rsc_id)
1638 {
1639  const char *end = pe_base_name_end(last_rsc_id);
1640  size_t base_name_len = end - last_rsc_id + 1;
1641  char *zero = NULL;
1642 
1643  CRM_ASSERT(end);
1644  zero = calloc(base_name_len + 3, sizeof(char));
1645  CRM_ASSERT(zero);
1646  memcpy(zero, last_rsc_id, base_name_len);
1647  zero[base_name_len] = ':';
1648  zero[base_name_len + 1] = '0';
1649  return zero;
1650 }
1651 
1652 static resource_t *
1653 create_fake_resource(const char *rsc_id, xmlNode * rsc_entry, pe_working_set_t * data_set)
1654 {
1655  resource_t *rsc = NULL;
1656  xmlNode *xml_rsc = create_xml_node(NULL, XML_CIB_TAG_RESOURCE);
1657 
1658  copy_in_properties(xml_rsc, rsc_entry);
1659  crm_xml_add(xml_rsc, XML_ATTR_ID, rsc_id);
1660  crm_log_xml_debug(xml_rsc, "Orphan resource");
1661 
1662  if (!common_unpack(xml_rsc, &rsc, NULL, data_set)) {
1663  return NULL;
1664  }
1665 
1666  if (xml_contains_remote_node(xml_rsc)) {
1667  node_t *node;
1668 
1669  crm_debug("Detected orphaned remote node %s", rsc_id);
1670  node = pe_find_node(data_set->nodes, rsc_id);
1671  if (node == NULL) {
1672  node = pe_create_node(rsc_id, rsc_id, "remote", NULL, data_set);
1673  }
1674  link_rsc2remotenode(data_set, rsc);
1675 
1676  if (node) {
1677  crm_trace("Setting node %s as shutting down due to orphaned connection resource", rsc_id);
1678  node->details->shutdown = TRUE;
1679  }
1680  }
1681 
1682  if (crm_element_value(rsc_entry, XML_RSC_ATTR_CONTAINER)) {
1683  /* This orphaned rsc needs to be mapped to a container. */
1684  crm_trace("Detected orphaned container filler %s", rsc_id);
1686  }
1687  set_bit(rsc->flags, pe_rsc_orphan);
1688  data_set->resources = g_list_append(data_set->resources, rsc);
1689  return rsc;
1690 }
1691 
1692 extern resource_t *create_child_clone(resource_t * rsc, int sub_id, pe_working_set_t * data_set);
1693 
1698 static pe_resource_t *
1699 create_anonymous_orphan(pe_resource_t *parent, const char *rsc_id,
1700  pe_node_t *node, pe_working_set_t *data_set)
1701 {
1702  pe_resource_t *top = create_child_clone(parent, -1, data_set);
1703 
1704  // find_rsc() because we might be a cloned group
1705  pe_resource_t *orphan = top->fns->find_rsc(top, rsc_id, NULL, pe_find_clone);
1706 
1707  pe_rsc_debug(parent, "Created orphan %s for %s: %s on %s",
1708  top->id, parent->id, rsc_id, node->details->uname);
1709  return orphan;
1710 }
1711 
1726 static resource_t *
1727 find_anonymous_clone(pe_working_set_t * data_set, node_t * node, resource_t * parent,
1728  const char *rsc_id)
1729 {
1730  GListPtr rIter = NULL;
1731  pe_resource_t *rsc = NULL;
1732  pe_resource_t *inactive_instance = NULL;
1733  gboolean skip_inactive = FALSE;
1734 
1735  CRM_ASSERT(parent != NULL);
1736  CRM_ASSERT(pe_rsc_is_clone(parent));
1737  CRM_ASSERT(is_not_set(parent->flags, pe_rsc_unique));
1738 
1739  // Check for active (or partially active, for cloned groups) instance
1740  pe_rsc_trace(parent, "Looking for %s on %s in %s", rsc_id, node->details->uname, parent->id);
1741  for (rIter = parent->children; rsc == NULL && rIter; rIter = rIter->next) {
1742  GListPtr locations = NULL;
1743  resource_t *child = rIter->data;
1744 
1745  /* Check whether this instance is already known to be active anywhere.
1746  *
1747  * "Active" in this case means known to be active at this stage of
1748  * unpacking. Because this function is called for a resource before the
1749  * resource's individual operation history entries are unpacked,
1750  * locations will generally be NULL.
1751  *
1752  * However, there are three exceptions:
1753  * (1) when child is a cloned group and we have already unpacked the
1754  * history of another member of the group;
1755  * (2) when we've already unpacked the history of another numbered
1756  * instance on the same node (which can happen if globally-unique
1757  * was flipped from true to false); and
1758  * (3) when we re-run calculations on the same data set as part of a
1759  * simulation.
1760  */
1761  child->fns->location(child, &locations, TRUE);
1762  if (locations) {
1763  /* We should never associate the same numbered anonymous clone
1764  * instance with multiple nodes, and clone instances can't migrate,
1765  * so there must be only one location, regardless of history.
1766  */
1767  CRM_LOG_ASSERT(locations->next == NULL);
1768 
1769  if (((pe_node_t *)locations->data)->details == node->details) {
1770  /* This instance is active on the requested node, so check for
1771  * a corresponding configured resource. We use find_rsc()
1772  * because child may be a cloned group, and we need the
1773  * particular member corresponding to rsc_id.
1774  *
1775  * If the history entry is orphaned, rsc will be NULL.
1776  */
1777  rsc = parent->fns->find_rsc(child, rsc_id, NULL, pe_find_clone);
1778  if (rsc) {
1779  pe_rsc_trace(parent, "Resource %s, active", rsc->id);
1780 
1781  /* If there are multiple active instances of an anonymous
1782  * clone in a single node's history (which can happen if
1783  * globally-unique is switched from true to false), we want
1784  * to consider the instances beyond the first as orphans.
1785  */
1786  if (rsc->running_on) {
1787  crm_notice("Now-anonymous clone %s has multiple instances active on %s",
1788  parent->id, node->details->uname);
1789  skip_inactive = TRUE;
1790  rsc = NULL;
1791  }
1792  }
1793  }
1794  g_list_free(locations);
1795 
1796  } else {
1797  pe_rsc_trace(parent, "Resource %s, skip inactive", child->id);
1798  if (!skip_inactive && !inactive_instance
1799  && is_not_set(child->flags, pe_rsc_block)) {
1800  // Remember one inactive instance in case we don't find active
1801  inactive_instance = parent->fns->find_rsc(child, rsc_id, NULL,
1802  pe_find_clone);
1803  }
1804  }
1805  }
1806 
1807  if ((rsc == NULL) && !skip_inactive && (inactive_instance != NULL)) {
1808  pe_rsc_trace(parent, "Resource %s, empty slot", inactive_instance->id);
1809  rsc = inactive_instance;
1810  }
1811 
1812  if (rsc == NULL) {
1813  rsc = create_anonymous_orphan(parent, rsc_id, node, data_set);
1814  pe_rsc_trace(parent, "Resource %s, orphan", rsc->id);
1815  }
1816  return rsc;
1817 }
1818 
1819 static resource_t *
1820 unpack_find_resource(pe_working_set_t * data_set, node_t * node, const char *rsc_id,
1821  xmlNode * rsc_entry)
1822 {
1823  resource_t *rsc = NULL;
1824  resource_t *parent = NULL;
1825 
1826  crm_trace("looking for %s", rsc_id);
1827  rsc = pe_find_resource(data_set->resources, rsc_id);
1828 
1829  if (rsc == NULL) {
1830  /* If we didn't find the resource by its name in the operation history,
1831  * check it again as a clone instance. Even when clone-max=0, we create
1832  * a single :0 orphan to match against here.
1833  */
1834  char *clone0_id = clone_zero(rsc_id);
1835  resource_t *clone0 = pe_find_resource(data_set->resources, clone0_id);
1836 
1837  if (clone0 && is_not_set(clone0->flags, pe_rsc_unique)) {
1838  rsc = clone0;
1839  } else {
1840  crm_trace("%s is not known as %s either", rsc_id, clone0_id);
1841  }
1842 
1843  /* Grab the parent clone even if this a different unique instance,
1844  * so we can remember the clone name, which will be the same.
1845  */
1846  parent = uber_parent(clone0);
1847  free(clone0_id);
1848 
1849  crm_trace("%s not found: %s", rsc_id, parent ? parent->id : "orphan");
1850 
1851  } else if (rsc->variant > pe_native) {
1852  crm_trace("%s is no longer a primitive resource, the lrm_resource entry is obsolete",
1853  rsc_id);
1854  return NULL;
1855 
1856  } else {
1857  parent = uber_parent(rsc);
1858  }
1859 
1860  if (pe_rsc_is_anon_clone(parent)) {
1861 
1862  if (pe_rsc_is_bundled(parent)) {
1863  rsc = find_container_child(parent->parent, node);
1864  } else {
1865  char *base = clone_strip(rsc_id);
1866 
1867  rsc = find_anonymous_clone(data_set, node, parent, base);
1868  free(base);
1869  CRM_ASSERT(rsc != NULL);
1870  }
1871  }
1872 
1873  if (rsc && safe_str_neq(rsc_id, rsc->id)
1874  && safe_str_neq(rsc_id, rsc->clone_name)) {
1875 
1876  free(rsc->clone_name);
1877  rsc->clone_name = strdup(rsc_id);
1878  pe_rsc_debug(rsc, "Internally renamed %s on %s to %s%s",
1879  rsc_id, node->details->uname, rsc->id,
1880  (is_set(rsc->flags, pe_rsc_orphan)? " (ORPHAN)" : ""));
1881  }
1882  return rsc;
1883 }
1884 
1885 static resource_t *
1886 process_orphan_resource(xmlNode * rsc_entry, node_t * node, pe_working_set_t * data_set)
1887 {
1888  resource_t *rsc = NULL;
1889  const char *rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID);
1890 
1891  crm_debug("Detected orphan resource %s on %s", rsc_id, node->details->uname);
1892  rsc = create_fake_resource(rsc_id, rsc_entry, data_set);
1893 
1894  if (is_set(data_set->flags, pe_flag_stop_rsc_orphans) == FALSE) {
1896 
1897  } else {
1898  print_resource(LOG_DEBUG_3, "Added orphan", rsc, FALSE);
1899 
1900  CRM_CHECK(rsc != NULL, return NULL);
1901  resource_location(rsc, NULL, -INFINITY, "__orphan_dont_run__", data_set);
1902  }
1903  return rsc;
1904 }
1905 
1906 static void
1907 process_rsc_state(resource_t * rsc, node_t * node,
1908  enum action_fail_response on_fail,
1909  xmlNode * migrate_op, pe_working_set_t * data_set)
1910 {
1911  node_t *tmpnode = NULL;
1912  char *reason = NULL;
1913 
1914  CRM_ASSERT(rsc);
1915  pe_rsc_trace(rsc, "Resource %s is %s on %s: on_fail=%s",
1916  rsc->id, role2text(rsc->role), node->details->uname, fail2text(on_fail));
1917 
1918  /* process current state */
1919  if (rsc->role != RSC_ROLE_UNKNOWN) {
1920  resource_t *iter = rsc;
1921 
1922  while (iter) {
1923  if (g_hash_table_lookup(iter->known_on, node->details->id) == NULL) {
1924  node_t *n = node_copy(node);
1925 
1926  pe_rsc_trace(rsc, "%s (aka. %s) known on %s", rsc->id, rsc->clone_name,
1927  n->details->uname);
1928  g_hash_table_insert(iter->known_on, (gpointer) n->details->id, n);
1929  }
1930  if (is_set(iter->flags, pe_rsc_unique)) {
1931  break;
1932  }
1933  iter = iter->parent;
1934  }
1935  }
1936 
1937  /* If a managed resource is believed to be running, but node is down ... */
1938  if (rsc->role > RSC_ROLE_STOPPED
1939  && node->details->online == FALSE
1940  && node->details->maintenance == FALSE
1941  && is_set(rsc->flags, pe_rsc_managed)) {
1942 
1943  gboolean should_fence = FALSE;
1944 
1945  /* If this is a guest node, fence it (regardless of whether fencing is
1946  * enabled, because guest node fencing is done by recovery of the
1947  * container resource rather than by stonithd). Mark the resource
1948  * we're processing as failed. When the guest comes back up, its
1949  * operation history in the CIB will be cleared, freeing the affected
1950  * resource to run again once we are sure we know its state.
1951  */
1952  if (is_container_remote_node(node)) {
1953  set_bit(rsc->flags, pe_rsc_failed);
1954  should_fence = TRUE;
1955 
1956  } else if (is_set(data_set->flags, pe_flag_stonith_enabled)) {
1957  if (is_baremetal_remote_node(node) && node->details->remote_rsc
1958  && is_not_set(node->details->remote_rsc->flags, pe_rsc_failed)) {
1959 
1960  /* setting unseen = true means that fencing of the remote node will
1961  * only occur if the connection resource is not going to start somewhere.
1962  * This allows connection resources on a failed cluster-node to move to
1963  * another node without requiring the baremetal remote nodes to be fenced
1964  * as well. */
1965  node->details->unseen = TRUE;
1966  reason = crm_strdup_printf("%s is active there (fencing will be"
1967  " revoked if remote connection can "
1968  "be re-established elsewhere)",
1969  rsc->id);
1970  }
1971  should_fence = TRUE;
1972  }
1973 
1974  if (should_fence) {
1975  if (reason == NULL) {
1976  reason = crm_strdup_printf("%s is thought to be active there", rsc->id);
1977  }
1978  pe_fence_node(data_set, node, reason);
1979  }
1980  free(reason);
1981  }
1982 
1983  if (node->details->unclean) {
1984  /* No extra processing needed
1985  * Also allows resources to be started again after a node is shot
1986  */
1987  on_fail = action_fail_ignore;
1988  }
1989 
1990  switch (on_fail) {
1991  case action_fail_ignore:
1992  /* nothing to do */
1993  break;
1994 
1995  case action_fail_fence:
1996  /* treat it as if it is still running
1997  * but also mark the node as unclean
1998  */
1999  reason = crm_strdup_printf("%s failed there", rsc->id);
2000  pe_fence_node(data_set, node, reason);
2001  free(reason);
2002  break;
2003 
2004  case action_fail_standby:
2005  node->details->standby = TRUE;
2006  node->details->standby_onfail = TRUE;
2007  break;
2008 
2009  case action_fail_block:
2010  /* is_managed == FALSE will prevent any
2011  * actions being sent for the resource
2012  */
2014  set_bit(rsc->flags, pe_rsc_block);
2015  break;
2016 
2017  case action_fail_migrate:
2018  /* make sure it comes up somewhere else
2019  * or not at all
2020  */
2021  resource_location(rsc, node, -INFINITY, "__action_migration_auto__", data_set);
2022  break;
2023 
2024  case action_fail_stop:
2025  rsc->next_role = RSC_ROLE_STOPPED;
2026  break;
2027 
2028  case action_fail_recover:
2029  if (rsc->role != RSC_ROLE_STOPPED && rsc->role != RSC_ROLE_UNKNOWN) {
2030  set_bit(rsc->flags, pe_rsc_failed);
2031  stop_action(rsc, node, FALSE);
2032  }
2033  break;
2034 
2036  set_bit(rsc->flags, pe_rsc_failed);
2037 
2038  if (rsc->container) {
2039  stop_action(rsc->container, node, FALSE);
2040  } else if (rsc->role != RSC_ROLE_STOPPED && rsc->role != RSC_ROLE_UNKNOWN) {
2041  stop_action(rsc, node, FALSE);
2042  }
2043  break;
2044 
2046  set_bit(rsc->flags, pe_rsc_failed);
2047  if (is_set(data_set->flags, pe_flag_stonith_enabled)) {
2048  tmpnode = NULL;
2049  if (rsc->is_remote_node) {
2050  tmpnode = pe_find_node(data_set->nodes, rsc->id);
2051  }
2052  if (tmpnode &&
2053  is_baremetal_remote_node(tmpnode) &&
2054  tmpnode->details->remote_was_fenced == 0) {
2055 
2056  /* connection resource to baremetal resource failed in a way that
2057  * should result in fencing the remote-node. */
2058  pe_fence_node(data_set, tmpnode,
2059  "remote connection is unrecoverable");
2060  }
2061  }
2062 
2063  /* require the stop action regardless if fencing is occurring or not. */
2064  if (rsc->role > RSC_ROLE_STOPPED) {
2065  stop_action(rsc, node, FALSE);
2066  }
2067 
2068  /* if reconnect delay is in use, prevent the connection from exiting the
2069  * "STOPPED" role until the failure is cleared by the delay timeout. */
2070  if (rsc->remote_reconnect_interval) {
2071  rsc->next_role = RSC_ROLE_STOPPED;
2072  }
2073  break;
2074  }
2075 
2076  /* ensure a remote-node connection failure forces an unclean remote-node
2077  * to be fenced. By setting unseen = FALSE, the remote-node failure will
2078  * result in a fencing operation regardless if we're going to attempt to
2079  * reconnect to the remote-node in this transition or not. */
2080  if (is_set(rsc->flags, pe_rsc_failed) && rsc->is_remote_node) {
2081  tmpnode = pe_find_node(data_set->nodes, rsc->id);
2082  if (tmpnode && tmpnode->details->unclean) {
2083  tmpnode->details->unseen = FALSE;
2084  }
2085  }
2086 
2087  if (rsc->role != RSC_ROLE_STOPPED && rsc->role != RSC_ROLE_UNKNOWN) {
2088  if (is_set(rsc->flags, pe_rsc_orphan)) {
2089  if (is_set(rsc->flags, pe_rsc_managed)) {
2090  crm_config_warn("Detected active orphan %s running on %s",
2091  rsc->id, node->details->uname);
2092  } else {
2093  crm_config_warn("Cluster configured not to stop active orphans."
2094  " %s must be stopped manually on %s",
2095  rsc->id, node->details->uname);
2096  }
2097  }
2098 
2099  native_add_running(rsc, node, data_set);
2100  if (on_fail != action_fail_ignore) {
2101  set_bit(rsc->flags, pe_rsc_failed);
2102  }
2103 
2104  } else if (rsc->clone_name && strchr(rsc->clone_name, ':') != NULL) {
2105  /* Only do this for older status sections that included instance numbers
2106  * Otherwise stopped instances will appear as orphans
2107  */
2108  pe_rsc_trace(rsc, "Resetting clone_name %s for %s (stopped)", rsc->clone_name, rsc->id);
2109  free(rsc->clone_name);
2110  rsc->clone_name = NULL;
2111 
2112  } else {
2113  char *key = stop_key(rsc);
2114  GListPtr possible_matches = find_actions(rsc->actions, key, node);
2115  GListPtr gIter = possible_matches;
2116 
2117  for (; gIter != NULL; gIter = gIter->next) {
2118  action_t *stop = (action_t *) gIter->data;
2119 
2120  stop->flags |= pe_action_optional;
2121  }
2122 
2123  g_list_free(possible_matches);
2124  free(key);
2125  }
2126 }
2127 
2128 /* create active recurring operations as optional */
2129 static void
2130 process_recurring(node_t * node, resource_t * rsc,
2131  int start_index, int stop_index,
2132  GListPtr sorted_op_list, pe_working_set_t * data_set)
2133 {
2134  int counter = -1;
2135  const char *task = NULL;
2136  const char *status = NULL;
2137  GListPtr gIter = sorted_op_list;
2138 
2139  CRM_ASSERT(rsc);
2140  pe_rsc_trace(rsc, "%s: Start index %d, stop index = %d", rsc->id, start_index, stop_index);
2141 
2142  for (; gIter != NULL; gIter = gIter->next) {
2143  xmlNode *rsc_op = (xmlNode *) gIter->data;
2144 
2145  int interval = 0;
2146  char *key = NULL;
2147  const char *id = ID(rsc_op);
2148  const char *interval_s = NULL;
2149 
2150  counter++;
2151 
2152  if (node->details->online == FALSE) {
2153  pe_rsc_trace(rsc, "Skipping %s/%s: node is offline", rsc->id, node->details->uname);
2154  break;
2155 
2156  /* Need to check if there's a monitor for role="Stopped" */
2157  } else if (start_index < stop_index && counter <= stop_index) {
2158  pe_rsc_trace(rsc, "Skipping %s/%s: resource is not active", id, node->details->uname);
2159  continue;
2160 
2161  } else if (counter < start_index) {
2162  pe_rsc_trace(rsc, "Skipping %s/%s: old %d", id, node->details->uname, counter);
2163  continue;
2164  }
2165 
2166  interval_s = crm_element_value(rsc_op, XML_LRM_ATTR_INTERVAL);
2167  interval = crm_parse_int(interval_s, "0");
2168  if (interval == 0) {
2169  pe_rsc_trace(rsc, "Skipping %s/%s: non-recurring", id, node->details->uname);
2170  continue;
2171  }
2172 
2173  status = crm_element_value(rsc_op, XML_LRM_ATTR_OPSTATUS);
2174  if (safe_str_eq(status, "-1")) {
2175  pe_rsc_trace(rsc, "Skipping %s/%s: status", id, node->details->uname);
2176  continue;
2177  }
2178  task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK);
2179  /* create the action */
2180  key = generate_op_key(rsc->id, task, interval);
2181  pe_rsc_trace(rsc, "Creating %s/%s", key, node->details->uname);
2182  custom_action(rsc, key, task, node, TRUE, TRUE, data_set);
2183  }
2184 }
2185 
2186 void
2187 calculate_active_ops(GListPtr sorted_op_list, int *start_index, int *stop_index)
2188 {
2189  int counter = -1;
2190  int implied_monitor_start = -1;
2191  int implied_master_start = -1;
2192  const char *task = NULL;
2193  const char *status = NULL;
2194  GListPtr gIter = sorted_op_list;
2195 
2196  *stop_index = -1;
2197  *start_index = -1;
2198 
2199  for (; gIter != NULL; gIter = gIter->next) {
2200  xmlNode *rsc_op = (xmlNode *) gIter->data;
2201 
2202  counter++;
2203 
2204  task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK);
2205  status = crm_element_value(rsc_op, XML_LRM_ATTR_OPSTATUS);
2206 
2207  if (safe_str_eq(task, CRMD_ACTION_STOP)
2208  && safe_str_eq(status, "0")) {
2209  *stop_index = counter;
2210 
2211  } else if (safe_str_eq(task, CRMD_ACTION_START) || safe_str_eq(task, CRMD_ACTION_MIGRATED)) {
2212  *start_index = counter;
2213 
2214  } else if ((implied_monitor_start <= *stop_index) && safe_str_eq(task, CRMD_ACTION_STATUS)) {
2215  const char *rc = crm_element_value(rsc_op, XML_LRM_ATTR_RC);
2216 
2217  if (safe_str_eq(rc, "0") || safe_str_eq(rc, "8")) {
2218  implied_monitor_start = counter;
2219  }
2220  } else if (safe_str_eq(task, CRMD_ACTION_PROMOTE) || safe_str_eq(task, CRMD_ACTION_DEMOTE)) {
2221  implied_master_start = counter;
2222  }
2223  }
2224 
2225  if (*start_index == -1) {
2226  if (implied_master_start != -1) {
2227  *start_index = implied_master_start;
2228  } else if (implied_monitor_start != -1) {
2229  *start_index = implied_monitor_start;
2230  }
2231  }
2232 }
2233 
2234 static resource_t *
2235 unpack_lrm_rsc_state(node_t * node, xmlNode * rsc_entry, pe_working_set_t * data_set)
2236 {
2237  GListPtr gIter = NULL;
2238  int stop_index = -1;
2239  int start_index = -1;
2240  enum rsc_role_e req_role = RSC_ROLE_UNKNOWN;
2241 
2242  const char *task = NULL;
2243  const char *rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID);
2244 
2245  resource_t *rsc = NULL;
2246  GListPtr op_list = NULL;
2247  GListPtr sorted_op_list = NULL;
2248 
2249  xmlNode *migrate_op = NULL;
2250  xmlNode *rsc_op = NULL;
2251  xmlNode *last_failure = NULL;
2252 
2253  enum action_fail_response on_fail = FALSE;
2254  enum rsc_role_e saved_role = RSC_ROLE_UNKNOWN;
2255 
2256  crm_trace("[%s] Processing %s on %s",
2257  crm_element_name(rsc_entry), rsc_id, node->details->uname);
2258 
2259  /* extract operations */
2260  op_list = NULL;
2261  sorted_op_list = NULL;
2262 
2263  for (rsc_op = __xml_first_child(rsc_entry); rsc_op != NULL; rsc_op = __xml_next_element(rsc_op)) {
2264  if (crm_str_eq((const char *)rsc_op->name, XML_LRM_TAG_RSC_OP, TRUE)) {
2265  op_list = g_list_prepend(op_list, rsc_op);
2266  }
2267  }
2268 
2269  if (op_list == NULL) {
2270  /* if there are no operations, there is nothing to do */
2271  return NULL;
2272  }
2273 
2274  /* find the resource */
2275  rsc = unpack_find_resource(data_set, node, rsc_id, rsc_entry);
2276  if (rsc == NULL) {
2277  rsc = process_orphan_resource(rsc_entry, node, data_set);
2278  }
2279  CRM_ASSERT(rsc != NULL);
2280 
2281  /* process operations */
2282  saved_role = rsc->role;
2283  on_fail = action_fail_ignore;
2284  rsc->role = RSC_ROLE_UNKNOWN;
2285  sorted_op_list = g_list_sort(op_list, sort_op_by_callid);
2286 
2287  for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) {
2288  xmlNode *rsc_op = (xmlNode *) gIter->data;
2289 
2290  task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK);
2291  if (safe_str_eq(task, CRMD_ACTION_MIGRATED)) {
2292  migrate_op = rsc_op;
2293  }
2294 
2295  unpack_rsc_op(rsc, node, rsc_op, &last_failure, &on_fail, data_set);
2296  }
2297 
2298  /* create active recurring operations as optional */
2299  calculate_active_ops(sorted_op_list, &start_index, &stop_index);
2300  process_recurring(node, rsc, start_index, stop_index, sorted_op_list, data_set);
2301 
2302  /* no need to free the contents */
2303  g_list_free(sorted_op_list);
2304 
2305  process_rsc_state(rsc, node, on_fail, migrate_op, data_set);
2306 
2307  if (get_target_role(rsc, &req_role)) {
2308  if (rsc->next_role == RSC_ROLE_UNKNOWN || req_role < rsc->next_role) {
2309  pe_rsc_debug(rsc, "%s: Overwriting calculated next role %s"
2310  " with requested next role %s",
2311  rsc->id, role2text(rsc->next_role), role2text(req_role));
2312  rsc->next_role = req_role;
2313 
2314  } else if (req_role > rsc->next_role) {
2315  pe_rsc_info(rsc, "%s: Not overwriting calculated next role %s"
2316  " with requested next role %s",
2317  rsc->id, role2text(rsc->next_role), role2text(req_role));
2318  }
2319  }
2320 
2321  if (saved_role > rsc->role) {
2322  rsc->role = saved_role;
2323  }
2324 
2325  return rsc;
2326 }
2327 
2328 static void
2329 handle_orphaned_container_fillers(xmlNode * lrm_rsc_list, pe_working_set_t * data_set)
2330 {
2331  xmlNode *rsc_entry = NULL;
2332  for (rsc_entry = __xml_first_child(lrm_rsc_list); rsc_entry != NULL;
2333  rsc_entry = __xml_next_element(rsc_entry)) {
2334 
2335  resource_t *rsc;
2336  resource_t *container;
2337  const char *rsc_id;
2338  const char *container_id;
2339 
2340  if (safe_str_neq((const char *)rsc_entry->name, XML_LRM_TAG_RESOURCE)) {
2341  continue;
2342  }
2343 
2344  container_id = crm_element_value(rsc_entry, XML_RSC_ATTR_CONTAINER);
2345  rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID);
2346  if (container_id == NULL || rsc_id == NULL) {
2347  continue;
2348  }
2349 
2350  container = pe_find_resource(data_set->resources, container_id);
2351  if (container == NULL) {
2352  continue;
2353  }
2354 
2355  rsc = pe_find_resource(data_set->resources, rsc_id);
2356  if (rsc == NULL ||
2357  is_set(rsc->flags, pe_rsc_orphan_container_filler) == FALSE ||
2358  rsc->container != NULL) {
2359  continue;
2360  }
2361 
2362  pe_rsc_trace(rsc, "Mapped orphaned rsc %s's container to %s", rsc->id, container_id);
2363  rsc->container = container;
2364  container->fillers = g_list_append(container->fillers, rsc);
2365  }
2366 }
2367 
2368 gboolean
2369 unpack_lrm_resources(node_t * node, xmlNode * lrm_rsc_list, pe_working_set_t * data_set)
2370 {
2371  xmlNode *rsc_entry = NULL;
2372  gboolean found_orphaned_container_filler = FALSE;
2373 
2374  CRM_CHECK(node != NULL, return FALSE);
2375 
2376  crm_trace("Unpacking resources on %s", node->details->uname);
2377 
2378  for (rsc_entry = __xml_first_child(lrm_rsc_list); rsc_entry != NULL;
2379  rsc_entry = __xml_next_element(rsc_entry)) {
2380 
2381  if (crm_str_eq((const char *)rsc_entry->name, XML_LRM_TAG_RESOURCE, TRUE)) {
2382  resource_t *rsc = unpack_lrm_rsc_state(node, rsc_entry, data_set);
2383  if (!rsc) {
2384  continue;
2385  }
2386  if (is_set(rsc->flags, pe_rsc_orphan_container_filler)) {
2387  found_orphaned_container_filler = TRUE;
2388  }
2389  }
2390  }
2391 
2392  /* now that all the resource state has been unpacked for this node
2393  * we have to go back and map any orphaned container fillers to their
2394  * container resource */
2395  if (found_orphaned_container_filler) {
2396  handle_orphaned_container_fillers(lrm_rsc_list, data_set);
2397  }
2398  return TRUE;
2399 }
2400 
2401 static void
2402 set_active(resource_t * rsc)
2403 {
2404  resource_t *top = uber_parent(rsc);
2405 
2406  if (top && top->variant == pe_master) {
2407  rsc->role = RSC_ROLE_SLAVE;
2408  } else {
2409  rsc->role = RSC_ROLE_STARTED;
2410  }
2411 }
2412 
2413 static void
2414 set_node_score(gpointer key, gpointer value, gpointer user_data)
2415 {
2416  node_t *node = value;
2417  int *score = user_data;
2418 
2419  node->weight = *score;
2420 }
2421 
2422 #define STATUS_PATH_MAX 1024
2423 static xmlNode *
2424 find_lrm_op(const char *resource, const char *op, const char *node, const char *source,
2425  pe_working_set_t * data_set)
2426 {
2427  int offset = 0;
2428  char xpath[STATUS_PATH_MAX];
2429 
2430  offset += snprintf(xpath + offset, STATUS_PATH_MAX - offset, "//node_state[@uname='%s']", node);
2431  offset +=
2432  snprintf(xpath + offset, STATUS_PATH_MAX - offset, "//" XML_LRM_TAG_RESOURCE "[@id='%s']",
2433  resource);
2434 
2435  /* Need to check against transition_magic too? */
2436  if (source && safe_str_eq(op, CRMD_ACTION_MIGRATE)) {
2437  offset +=
2438  snprintf(xpath + offset, STATUS_PATH_MAX - offset,
2439  "/" XML_LRM_TAG_RSC_OP "[@operation='%s' and @migrate_target='%s']", op,
2440  source);
2441  } else if (source && safe_str_eq(op, CRMD_ACTION_MIGRATED)) {
2442  offset +=
2443  snprintf(xpath + offset, STATUS_PATH_MAX - offset,
2444  "/" XML_LRM_TAG_RSC_OP "[@operation='%s' and @migrate_source='%s']", op,
2445  source);
2446  } else {
2447  offset +=
2448  snprintf(xpath + offset, STATUS_PATH_MAX - offset,
2449  "/" XML_LRM_TAG_RSC_OP "[@operation='%s']", op);
2450  }
2451 
2452  CRM_LOG_ASSERT(offset > 0);
2453  return get_xpath_object(xpath, data_set->input, LOG_DEBUG);
2454 }
2455 
2456 static bool
2457 stop_happened_after(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op,
2458  pe_working_set_t *data_set)
2459 {
2460  xmlNode *stop_op = find_lrm_op(rsc->id, CRMD_ACTION_STOP, node->details->id,
2461  NULL, data_set);
2462 
2463  if (stop_op) {
2464  int stop_id = 0;
2465  int task_id = 0;
2466 
2467  crm_element_value_int(stop_op, XML_LRM_ATTR_CALLID, &stop_id);
2468  crm_element_value_int(xml_op, XML_LRM_ATTR_CALLID, &task_id);
2469  if (stop_id > task_id) {
2470  return TRUE;
2471  }
2472  }
2473  return FALSE;
2474 }
2475 
2476 static void
2477 unpack_rsc_migration(resource_t *rsc, node_t *node, xmlNode *xml_op, pe_working_set_t * data_set)
2478 {
2479  /* A successful migration sequence is:
2480  * migrate_to on source node
2481  * migrate_from on target node
2482  * stop on source node
2483  *
2484  * If a migrate_to is followed by a stop, the entire migration (successful
2485  * or failed) is complete, and we don't care what happened on the target.
2486  *
2487  * If no migrate_from has happened, the migration is considered to be
2488  * "partial". If the migrate_from failed, make sure the resource gets
2489  * stopped on both source and target (if up).
2490  *
2491  * If the migrate_to and migrate_from both succeeded (which also implies the
2492  * resource is no longer running on the source), but there is no stop, the
2493  * migration is considered to be "dangling".
2494  */
2495  int from_rc = 0;
2496  int from_status = 0;
2497  const char *migrate_source = NULL;
2498  const char *migrate_target = NULL;
2499  pe_node_t *target = NULL;
2500  pe_node_t *source = NULL;
2501  xmlNode *migrate_from = NULL;
2502 
2503  if (stop_happened_after(rsc, node, xml_op, data_set)) {
2504  return;
2505  }
2506 
2507  // Clones are not allowed to migrate, so role can't be master
2508  rsc->role = RSC_ROLE_STARTED;
2509 
2510  migrate_source = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_SOURCE);
2511  migrate_target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET);
2512 
2513  target = pe_find_node(data_set->nodes, migrate_target);
2514  source = pe_find_node(data_set->nodes, migrate_source);
2515 
2516  // Check whether there was a migrate_from action
2517  migrate_from = find_lrm_op(rsc->id, CRMD_ACTION_MIGRATED, migrate_target,
2518  migrate_source, data_set);
2519  if (migrate_from) {
2520  crm_element_value_int(migrate_from, XML_LRM_ATTR_RC, &from_rc);
2521  crm_element_value_int(migrate_from, XML_LRM_ATTR_OPSTATUS, &from_status);
2522  pe_rsc_trace(rsc, "%s op on %s exited with status=%d, rc=%d",
2523  ID(migrate_from), migrate_target, from_status, from_rc);
2524  }
2525 
2526  if (migrate_from && from_rc == PCMK_OCF_OK
2527  && from_status == PCMK_LRM_OP_DONE) {
2528  /* The migrate_to and migrate_from both succeeded, so mark the migration
2529  * as "dangling". This will be used to schedule a stop action on the
2530  * source without affecting the target.
2531  */
2532  pe_rsc_trace(rsc, "Detected dangling migration op: %s on %s", ID(xml_op),
2533  migrate_source);
2534  rsc->role = RSC_ROLE_STOPPED;
2535  rsc->dangling_migrations = g_list_prepend(rsc->dangling_migrations, node);
2536 
2537  } else if (migrate_from && (from_status != PCMK_LRM_OP_PENDING)) { // Failed
2538  if (target && target->details->online) {
2539  pe_rsc_trace(rsc, "Marking active on %s %p %d", migrate_target, target,
2540  target->details->online);
2541  native_add_running(rsc, target, data_set);
2542  }
2543 
2544  } else { // Pending, or complete but erased
2545  if (target && target->details->online) {
2546  pe_rsc_trace(rsc, "Marking active on %s %p %d", migrate_target, target,
2547  target->details->online);
2548 
2549  native_add_running(rsc, target, data_set);
2550  if (source && source->details->online) {
2551  /* This is a partial migration: the migrate_to completed
2552  * successfully on the source, but the migrate_from has not
2553  * completed. Remember the source and target; if the newly
2554  * chosen target remains the same when we schedule actions
2555  * later, we may continue with the migration.
2556  */
2557  rsc->partial_migration_target = target;
2558  rsc->partial_migration_source = source;
2559  }
2560  } else {
2561  /* Consider it failed here - forces a restart, prevents migration */
2562  set_bit(rsc->flags, pe_rsc_failed);
2564  }
2565  }
2566 }
2567 
2568 static void
2569 unpack_rsc_migration_failure(resource_t *rsc, node_t *node, xmlNode *xml_op, pe_working_set_t * data_set)
2570 {
2571  const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
2572 
2573  CRM_ASSERT(rsc);
2574  if (safe_str_eq(task, CRMD_ACTION_MIGRATED)) {
2575  int stop_id = 0;
2576  int migrate_id = 0;
2577  const char *migrate_source = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_SOURCE);
2578  const char *migrate_target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET);
2579 
2580  xmlNode *stop_op =
2581  find_lrm_op(rsc->id, CRMD_ACTION_STOP, migrate_source, NULL, data_set);
2582  xmlNode *migrate_op =
2583  find_lrm_op(rsc->id, CRMD_ACTION_MIGRATE, migrate_source, migrate_target,
2584  data_set);
2585 
2586  if (stop_op) {
2587  crm_element_value_int(stop_op, XML_LRM_ATTR_CALLID, &stop_id);
2588  }
2589  if (migrate_op) {
2590  crm_element_value_int(migrate_op, XML_LRM_ATTR_CALLID, &migrate_id);
2591  }
2592 
2593  /* Get our state right */
2594  rsc->role = RSC_ROLE_STARTED; /* can be master? */
2595 
2596  if (stop_op == NULL || stop_id < migrate_id) {
2597  node_t *source = pe_find_node(data_set->nodes, migrate_source);
2598 
2599  if (source && source->details->online) {
2600  native_add_running(rsc, source, data_set);
2601  }
2602  }
2603 
2604  } else if (safe_str_eq(task, CRMD_ACTION_MIGRATE)) {
2605  int stop_id = 0;
2606  int migrate_id = 0;
2607  const char *migrate_source = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_SOURCE);
2608  const char *migrate_target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET);
2609 
2610  xmlNode *stop_op =
2611  find_lrm_op(rsc->id, CRMD_ACTION_STOP, migrate_target, NULL, data_set);
2612  xmlNode *migrate_op =
2613  find_lrm_op(rsc->id, CRMD_ACTION_MIGRATED, migrate_target, migrate_source,
2614  data_set);
2615 
2616  if (stop_op) {
2617  crm_element_value_int(stop_op, XML_LRM_ATTR_CALLID, &stop_id);
2618  }
2619  if (migrate_op) {
2620  crm_element_value_int(migrate_op, XML_LRM_ATTR_CALLID, &migrate_id);
2621  }
2622 
2623  /* Get our state right */
2624  rsc->role = RSC_ROLE_STARTED; /* can be master? */
2625 
2626  if (stop_op == NULL || stop_id < migrate_id) {
2627  node_t *target = pe_find_node(data_set->nodes, migrate_target);
2628 
2629  pe_rsc_trace(rsc, "Stop: %p %d, Migrated: %p %d", stop_op, stop_id, migrate_op,
2630  migrate_id);
2631  if (target && target->details->online) {
2632  native_add_running(rsc, target, data_set);
2633  }
2634 
2635  } else if (migrate_op == NULL) {
2636  /* Make sure it gets cleaned up, the stop may pre-date the migrate_from */
2637  rsc->dangling_migrations = g_list_prepend(rsc->dangling_migrations, node);
2638  }
2639  }
2640 }
2641 
2642 static void
2643 record_failed_op(xmlNode *op, node_t* node, resource_t *rsc, pe_working_set_t * data_set)
2644 {
2645  xmlNode *xIter = NULL;
2646  const char *op_key = crm_element_value(op, XML_LRM_ATTR_TASK_KEY);
2647 
2648  if (node->details->online == FALSE) {
2649  return;
2650  }
2651 
2652  for (xIter = data_set->failed->children; xIter; xIter = xIter->next) {
2653  const char *key = crm_element_value(xIter, XML_LRM_ATTR_TASK_KEY);
2654  const char *uname = crm_element_value(xIter, XML_ATTR_UNAME);
2655 
2656  if(safe_str_eq(op_key, key) && safe_str_eq(uname, node->details->uname)) {
2657  crm_trace("Skipping duplicate entry %s on %s", op_key, node->details->uname);
2658  return;
2659  }
2660  }
2661 
2662  crm_trace("Adding entry %s on %s", op_key, node->details->uname);
2663  crm_xml_add(op, XML_ATTR_UNAME, node->details->uname);
2664  crm_xml_add(op, XML_LRM_ATTR_RSCID, rsc->id);
2665  add_node_copy(data_set->failed, op);
2666 }
2667 
2668 static const char *get_op_key(xmlNode *xml_op)
2669 {
2670  const char *key = crm_element_value(xml_op, XML_LRM_ATTR_TASK_KEY);
2671  if(key == NULL) {
2672  key = ID(xml_op);
2673  }
2674  return key;
2675 }
2676 
2677 static void
2678 unpack_rsc_op_failure(resource_t * rsc, node_t * node, int rc, xmlNode * xml_op, xmlNode ** last_failure,
2679  enum action_fail_response * on_fail, pe_working_set_t * data_set)
2680 {
2681  int interval = 0;
2682  bool is_probe = FALSE;
2683  action_t *action = NULL;
2684 
2685  const char *key = get_op_key(xml_op);
2686  const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
2687  const char *op_version = crm_element_value(xml_op, XML_ATTR_CRM_VERSION);
2688 
2689  CRM_ASSERT(rsc);
2690 
2691  *last_failure = xml_op;
2692 
2693  crm_element_value_int(xml_op, XML_LRM_ATTR_INTERVAL, &interval);
2694  if(interval == 0 && safe_str_eq(task, CRMD_ACTION_STATUS)) {
2695  is_probe = TRUE;
2696  pe_rsc_trace(rsc, "is a probe: %s", key);
2697  }
2698 
2699  if (rc != PCMK_OCF_NOT_INSTALLED || is_set(data_set->flags, pe_flag_symmetric_cluster)) {
2700  crm_warn("Processing failed %s of %s on %s: %s " CRM_XS " rc=%d",
2701  (is_probe? "probe" : task), rsc->id, node->details->uname,
2702  services_ocf_exitcode_str(rc), rc);
2703 
2704  if (is_probe && (rc != PCMK_OCF_OK)
2705  && (rc != PCMK_OCF_NOT_RUNNING)
2706  && (rc != PCMK_OCF_RUNNING_MASTER)) {
2707 
2708  /* A failed (not just unexpected) probe result could mean the user
2709  * didn't know resources will be probed even where they can't run.
2710  */
2711  crm_notice("If it is not possible for %s to run on %s, see "
2712  "the resource-discovery option for location constraints",
2713  rsc->id, node->details->uname);
2714  }
2715 
2716  record_failed_op(xml_op, node, rsc, data_set);
2717 
2718  } else {
2719  crm_trace("Processing failed op %s for %s on %s: %s (%d)",
2720  task, rsc->id, node->details->uname, services_ocf_exitcode_str(rc),
2721  rc);
2722  }
2723 
2724  action = custom_action(rsc, strdup(key), task, NULL, TRUE, FALSE, data_set);
2725  if ((action->on_fail <= action_fail_fence && *on_fail < action->on_fail) ||
2726  (action->on_fail == action_fail_reset_remote && *on_fail <= action_fail_recover) ||
2727  (action->on_fail == action_fail_restart_container && *on_fail <= action_fail_recover) ||
2728  (*on_fail == action_fail_restart_container && action->on_fail >= action_fail_migrate)) {
2729  pe_rsc_trace(rsc, "on-fail %s -> %s for %s (%s)", fail2text(*on_fail),
2730  fail2text(action->on_fail), action->uuid, key);
2731  *on_fail = action->on_fail;
2732  }
2733 
2734  if (safe_str_eq(task, CRMD_ACTION_STOP)) {
2735  resource_location(rsc, node, -INFINITY, "__stop_fail__", data_set);
2736 
2737  } else if (safe_str_eq(task, CRMD_ACTION_MIGRATE) || safe_str_eq(task, CRMD_ACTION_MIGRATED)) {
2738  unpack_rsc_migration_failure(rsc, node, xml_op, data_set);
2739 
2740  } else if (safe_str_eq(task, CRMD_ACTION_PROMOTE)) {
2741  rsc->role = RSC_ROLE_MASTER;
2742 
2743  } else if (safe_str_eq(task, CRMD_ACTION_DEMOTE)) {
2744  if (action->on_fail == action_fail_block) {
2745  rsc->role = RSC_ROLE_MASTER;
2746  rsc->next_role = RSC_ROLE_STOPPED;
2747 
2748  } else if(rc == PCMK_OCF_NOT_RUNNING) {
2749  rsc->role = RSC_ROLE_STOPPED;
2750 
2751  } else {
2752  /*
2753  * Staying in master role would put the PE/TE into a loop. Setting
2754  * slave role is not dangerous because the resource will be stopped
2755  * as part of recovery, and any master promotion will be ordered
2756  * after that stop.
2757  */
2758  rsc->role = RSC_ROLE_SLAVE;
2759  }
2760 
2761  } else if (compare_version("2.0", op_version) > 0 && safe_str_eq(task, CRMD_ACTION_START)) {
2762  crm_warn("Compatibility handling for failed op %s on %s", key, node->details->uname);
2763  resource_location(rsc, node, -INFINITY, "__legacy_start__", data_set);
2764  }
2765 
2766  if(is_probe && rc == PCMK_OCF_NOT_INSTALLED) {
2767  /* leave stopped */
2768  pe_rsc_trace(rsc, "Leaving %s stopped", rsc->id);
2769  rsc->role = RSC_ROLE_STOPPED;
2770 
2771  } else if (rsc->role < RSC_ROLE_STARTED) {
2772  pe_rsc_trace(rsc, "Setting %s active", rsc->id);
2773  set_active(rsc);
2774  }
2775 
2776  pe_rsc_trace(rsc, "Resource %s: role=%s, unclean=%s, on_fail=%s, fail_role=%s",
2777  rsc->id, role2text(rsc->role),
2778  node->details->unclean ? "true" : "false",
2779  fail2text(action->on_fail), role2text(action->fail_role));
2780 
2781  if (action->fail_role != RSC_ROLE_STARTED && rsc->next_role < action->fail_role) {
2782  rsc->next_role = action->fail_role;
2783  }
2784 
2785  if (action->fail_role == RSC_ROLE_STOPPED) {
2786  int score = -INFINITY;
2787 
2788  resource_t *fail_rsc = rsc;
2789 
2790  if (fail_rsc->parent) {
2791  resource_t *parent = uber_parent(fail_rsc);
2792 
2793  if (pe_rsc_is_clone(parent)
2794  && is_not_set(parent->flags, pe_rsc_unique)) {
2795  /* for clone and master resources, if a child fails on an operation
2796  * with on-fail = stop, all the resources fail. Do this by preventing
2797  * the parent from coming up again. */
2798  fail_rsc = parent;
2799  }
2800  }
2801  crm_warn("Making sure %s doesn't come up again", fail_rsc->id);
2802  /* make sure it doesn't come up again */
2803  if (fail_rsc->allowed_nodes != NULL) {
2804  g_hash_table_destroy(fail_rsc->allowed_nodes);
2805  }
2806  fail_rsc->allowed_nodes = node_hash_from_list(data_set->nodes);
2807  g_hash_table_foreach(fail_rsc->allowed_nodes, set_node_score, &score);
2808  }
2809 
2810  pe_free_action(action);
2811 }
2812 
2813 static int
2814 determine_op_status(
2815  resource_t *rsc, int rc, int target_rc, node_t * node, xmlNode * xml_op, enum action_fail_response * on_fail, pe_working_set_t * data_set)
2816 {
2817  int interval = 0;
2818  int result = PCMK_LRM_OP_DONE;
2819 
2820  const char *key = get_op_key(xml_op);
2821  const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
2822 
2823  bool is_probe = FALSE;
2824 
2825  CRM_ASSERT(rsc);
2826  crm_element_value_int(xml_op, XML_LRM_ATTR_INTERVAL, &interval);
2827  if (interval == 0 && safe_str_eq(task, CRMD_ACTION_STATUS)) {
2828  is_probe = TRUE;
2829  }
2830 
2831  if (target_rc >= 0 && target_rc != rc) {
2832  result = PCMK_LRM_OP_ERROR;
2833  pe_rsc_debug(rsc, "%s on %s returned '%s' (%d) instead of the expected value: '%s' (%d)",
2834  key, node->details->uname,
2835  services_ocf_exitcode_str(rc), rc,
2836  services_ocf_exitcode_str(target_rc), target_rc);
2837  }
2838 
2839  /* we could clean this up significantly except for old LRMs and CRMs that
2840  * didn't include target_rc and liked to remap status
2841  */
2842  switch (rc) {
2843  case PCMK_OCF_OK:
2844  if (is_probe && target_rc == 7) {
2845  result = PCMK_LRM_OP_DONE;
2846  pe_rsc_info(rsc, "Operation %s found resource %s active on %s",
2847  task, rsc->id, node->details->uname);
2848 
2849  /* legacy code for pre-0.6.5 operations */
2850  } else if (target_rc < 0 && interval > 0 && rsc->role == RSC_ROLE_MASTER) {
2851  /* catch status ops that return 0 instead of 8 while they
2852  * are supposed to be in master mode
2853  */
2854  result = PCMK_LRM_OP_ERROR;
2855  }
2856  break;
2857 
2858  case PCMK_OCF_NOT_RUNNING:
2859  if (is_probe || target_rc == rc || is_not_set(rsc->flags, pe_rsc_managed)) {
2860  result = PCMK_LRM_OP_DONE;
2861  rsc->role = RSC_ROLE_STOPPED;
2862 
2863  /* clear any previous failure actions */
2864  *on_fail = action_fail_ignore;
2865  rsc->next_role = RSC_ROLE_UNKNOWN;
2866 
2867  } else if (safe_str_neq(task, CRMD_ACTION_STOP)) {
2868  result = PCMK_LRM_OP_ERROR;
2869  }
2870  break;
2871 
2873  if (is_probe) {
2874  result = PCMK_LRM_OP_DONE;
2875  pe_rsc_info(rsc, "Operation %s found resource %s active in master mode on %s",
2876  task, rsc->id, node->details->uname);
2877 
2878  } else if (target_rc == rc) {
2879  /* nothing to do */
2880 
2881  } else if (target_rc >= 0) {
2882  result = PCMK_LRM_OP_ERROR;
2883 
2884  /* legacy code for pre-0.6.5 operations */
2885  } else if (safe_str_neq(task, CRMD_ACTION_STATUS)
2886  || rsc->role != RSC_ROLE_MASTER) {
2887  result = PCMK_LRM_OP_ERROR;
2888  if (rsc->role != RSC_ROLE_MASTER) {
2889  crm_err("%s reported %s in master mode on %s",
2890  key, rsc->id, node->details->uname);
2891  }
2892  }
2893  rsc->role = RSC_ROLE_MASTER;
2894  break;
2895 
2898  rsc->role = RSC_ROLE_MASTER;
2899  result = PCMK_LRM_OP_ERROR;
2900  break;
2901 
2903  result = PCMK_LRM_OP_ERROR_FATAL;
2904  break;
2905 
2910  if (rc == PCMK_OCF_UNIMPLEMENT_FEATURE && interval > 0) {
2911  result = PCMK_LRM_OP_NOTSUPPORTED;
2912  break;
2913 
2914  } else if (pe_can_fence(data_set, node) == FALSE
2915  && safe_str_eq(task, CRMD_ACTION_STOP)) {
2916  /* If a stop fails and we can't fence, there's nothing else we can do */
2917  pe_proc_err("No further recovery can be attempted for %s: %s action failed with '%s' (%d)",
2918  rsc->id, task, services_ocf_exitcode_str(rc), rc);
2920  set_bit(rsc->flags, pe_rsc_block);
2921  }
2922  result = PCMK_LRM_OP_ERROR_HARD;
2923  break;
2924 
2925  default:
2926  if (result == PCMK_LRM_OP_DONE) {
2927  crm_info("Treating %s (rc=%d) on %s as an ERROR",
2928  key, rc, node->details->uname);
2929  result = PCMK_LRM_OP_ERROR;
2930  }
2931  }
2932 
2933  return result;
2934 }
2935 
2936 static bool check_operation_expiry(resource_t *rsc, node_t *node, int rc, xmlNode *xml_op, pe_working_set_t * data_set)
2937 {
2938  bool expired = FALSE;
2939  time_t last_failure = 0;
2940  int interval = 0;
2941  int failure_timeout = rsc->failure_timeout;
2942  const char *key = get_op_key(xml_op);
2943  const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
2944  const char *clear_reason = NULL;
2945 
2946  /* clearing recurring monitor operation failures automatically
2947  * needs to be carefully considered */
2948  if (safe_str_eq(crm_element_value(xml_op, XML_LRM_ATTR_TASK), "monitor") &&
2950 
2951  /* TODO, in the future we should consider not clearing recurring monitor
2952  * op failures unless the last action for a resource was a "stop" action.
2953  * otherwise it is possible that clearing the monitor failure will result
2954  * in the resource being in an undeterministic state.
2955  *
2956  * For now we handle this potential undeterministic condition for remote
2957  * node connection resources by not clearing a recurring monitor op failure
2958  * until after the node has been fenced. */
2959 
2960  if (is_set(data_set->flags, pe_flag_stonith_enabled) &&
2961  (rsc->remote_reconnect_interval)) {
2962 
2963  node_t *remote_node = pe_find_node(data_set->nodes, rsc->id);
2964  if (remote_node && remote_node->details->remote_was_fenced == 0) {
2965  if (strstr(ID(xml_op), "last_failure")) {
2966  crm_info("Waiting to clear monitor failure for remote node %s until fencing has occurred", rsc->id);
2967  }
2968  /* disabling failure timeout for this operation because we believe
2969  * fencing of the remote node should occur first. */
2970  failure_timeout = 0;
2971  }
2972  }
2973  }
2974 
2975  if (failure_timeout > 0) {
2976  int last_run = 0;
2977 
2978  if (crm_element_value_int(xml_op, XML_RSC_OP_LAST_CHANGE, &last_run) == 0) {
2979  time_t now = get_effective_time(data_set);
2980 
2981  if (now > (last_run + failure_timeout)) {
2982  expired = TRUE;
2983  }
2984  }
2985  }
2986 
2987  if (expired) {
2988  if (failure_timeout > 0) {
2989  if (pe_get_failcount(node, rsc, &last_failure, pe_fc_default,
2990  xml_op, data_set)) {
2991 
2992  if (pe_get_failcount(node, rsc, &last_failure, pe_fc_effective,
2993  xml_op, data_set) == 0) {
2994  clear_reason = "it expired";
2995  } else {
2996  expired = FALSE;
2997  }
2998 
2999  } else if (rsc->remote_reconnect_interval && strstr(ID(xml_op), "last_failure")) {
3000  /* always clear last failure when reconnect interval is set */
3001  clear_reason = "reconnect interval is set";
3002  }
3003  }
3004 
3005  } else if (strstr(ID(xml_op), "last_failure") &&
3006  ((strcmp(task, "start") == 0) || (strcmp(task, "monitor") == 0))) {
3007 
3008  op_digest_cache_t *digest_data = NULL;
3009 
3010  digest_data = rsc_action_digest_cmp(rsc, xml_op, node, data_set);
3011 
3012  if (digest_data->rc == RSC_DIGEST_UNKNOWN) {
3013  crm_trace("rsc op %s/%s on node %s does not have a op digest to compare against", rsc->id,
3014  key, node->details->id);
3015  } else if(container_fix_remote_addr(rsc) && digest_data->rc != RSC_DIGEST_MATCH) {
3016  // We can't sanely check the changing 'addr' attribute. Yet
3017  crm_trace("Ignoring rsc op %s/%s on node %s", rsc->id, key, node->details->id);
3018 
3019  } else if (digest_data->rc != RSC_DIGEST_MATCH) {
3020  clear_reason = "resource parameters have changed";
3021  }
3022  }
3023 
3024  if (clear_reason != NULL) {
3025  node_t *remote_node = pe_find_node(data_set->nodes, rsc->id);
3026  char *key = generate_op_key(rsc->id, CRM_OP_CLEAR_FAILCOUNT, 0);
3027  action_t *clear_op = custom_action(rsc, key, CRM_OP_CLEAR_FAILCOUNT,
3028  node, FALSE, TRUE, data_set);
3029 
3031 
3032  crm_notice("Clearing failure of %s on %s because %s " CRM_XS " %s",
3033  rsc->id, node->details->uname, clear_reason, clear_op->uuid);
3034 
3035  if (is_set(data_set->flags, pe_flag_stonith_enabled)
3037  && remote_node
3038  && remote_node->details->unclean) {
3039 
3040  action_t *fence = pe_fence_op(remote_node, NULL, TRUE, NULL, data_set);
3041  crm_notice("Waiting for %s to complete before clearing %s failure for remote node %s", fence?fence->uuid:"nil", task, rsc->id);
3042 
3043  order_actions(fence, clear_op, pe_order_implies_then);
3044  }
3045  }
3046 
3047  crm_element_value_int(xml_op, XML_LRM_ATTR_INTERVAL, &interval);
3048  if(expired && interval == 0 && safe_str_eq(task, CRMD_ACTION_STATUS)) {
3049  switch(rc) {
3050  case PCMK_OCF_OK:
3051  case PCMK_OCF_NOT_RUNNING:
3053  case PCMK_OCF_DEGRADED:
3055  /* Don't expire probes that return these values */
3056  expired = FALSE;
3057  break;
3058  }
3059  }
3060 
3061  return expired;
3062 }
3063 
3064 int get_target_rc(xmlNode *xml_op)
3065 {
3066  int dummy = 0;
3067  int target_rc = 0;
3068  char *dummy_string = NULL;
3069  const char *key = crm_element_value(xml_op, XML_ATTR_TRANSITION_KEY);
3070  if (key == NULL) {
3071  return -1;
3072  }
3073 
3074  decode_transition_key(key, &dummy_string, &dummy, &dummy, &target_rc);
3075  free(dummy_string);
3076 
3077  return target_rc;
3078 }
3079 
3080 static enum action_fail_response
3081 get_action_on_fail(resource_t *rsc, const char *key, const char *task, pe_working_set_t * data_set)
3082 {
3083  int result = action_fail_recover;
3084  action_t *action = custom_action(rsc, strdup(key), task, NULL, TRUE, FALSE, data_set);
3085 
3086  result = action->on_fail;
3087  pe_free_action(action);
3088 
3089  return result;
3090 }
3091 
3092 static void
3093 update_resource_state(resource_t * rsc, node_t * node, xmlNode * xml_op, const char * task, int rc,
3094  xmlNode * last_failure, enum action_fail_response * on_fail, pe_working_set_t * data_set)
3095 {
3096  gboolean clear_past_failure = FALSE;
3097 
3098  CRM_ASSERT(rsc);
3099  CRM_ASSERT(xml_op);
3100 
3101  if (rc == PCMK_OCF_NOT_RUNNING) {
3102  clear_past_failure = TRUE;
3103 
3104  } else if (rc == PCMK_OCF_NOT_INSTALLED) {
3105  rsc->role = RSC_ROLE_STOPPED;
3106 
3107  } else if (safe_str_eq(task, CRMD_ACTION_STATUS)) {
3108  if (last_failure) {
3109  const char *op_key = get_op_key(xml_op);
3110  const char *last_failure_key = get_op_key(last_failure);
3111 
3112  if (safe_str_eq(op_key, last_failure_key)) {
3113  clear_past_failure = TRUE;
3114  }
3115  }
3116 
3117  if (rsc->role < RSC_ROLE_STARTED) {
3118  set_active(rsc);
3119  }
3120 
3121  } else if (safe_str_eq(task, CRMD_ACTION_START)) {
3122  rsc->role = RSC_ROLE_STARTED;
3123  clear_past_failure = TRUE;
3124 
3125  } else if (safe_str_eq(task, CRMD_ACTION_STOP)) {
3126  rsc->role = RSC_ROLE_STOPPED;
3127  clear_past_failure = TRUE;
3128 
3129  } else if (safe_str_eq(task, CRMD_ACTION_PROMOTE)) {
3130  rsc->role = RSC_ROLE_MASTER;
3131  clear_past_failure = TRUE;
3132 
3133  } else if (safe_str_eq(task, CRMD_ACTION_DEMOTE)) {
3134  /* Demote from Master does not clear an error */
3135  rsc->role = RSC_ROLE_SLAVE;
3136 
3137  } else if (safe_str_eq(task, CRMD_ACTION_MIGRATED)) {
3138  rsc->role = RSC_ROLE_STARTED;
3139  clear_past_failure = TRUE;
3140 
3141  } else if (safe_str_eq(task, CRMD_ACTION_MIGRATE)) {
3142  unpack_rsc_migration(rsc, node, xml_op, data_set);
3143 
3144  } else if (rsc->role < RSC_ROLE_STARTED) {
3145  pe_rsc_trace(rsc, "%s active on %s", rsc->id, node->details->uname);
3146  set_active(rsc);
3147  }
3148 
3149  /* clear any previous failure actions */
3150  if (clear_past_failure) {
3151  switch (*on_fail) {
3152  case action_fail_stop:
3153  case action_fail_fence:
3154  case action_fail_migrate:
3155  case action_fail_standby:
3156  pe_rsc_trace(rsc, "%s.%s is not cleared by a completed stop",
3157  rsc->id, fail2text(*on_fail));
3158  break;
3159 
3160  case action_fail_block:
3161  case action_fail_ignore:
3162  case action_fail_recover:
3164  *on_fail = action_fail_ignore;
3165  rsc->next_role = RSC_ROLE_UNKNOWN;
3166  break;
3168  if (rsc->remote_reconnect_interval == 0) {
3169  /* when reconnect delay is not in use, the connection is allowed
3170  * to start again after the remote node is fenced and completely
3171  * stopped. Otherwise, with reconnect delay we wait for the failure
3172  * to be cleared entirely before reconnected can be attempted. */
3173  *on_fail = action_fail_ignore;
3174  rsc->next_role = RSC_ROLE_UNKNOWN;
3175  }
3176  break;
3177  }
3178  }
3179 }
3180 
3181 
3182 gboolean
3183 unpack_rsc_op(resource_t * rsc, node_t * node, xmlNode * xml_op, xmlNode ** last_failure,
3184  enum action_fail_response * on_fail, pe_working_set_t * data_set)
3185 {
3186  int task_id = 0;
3187 
3188  const char *key = NULL;
3189  const char *task = NULL;
3190  const char *task_key = NULL;
3191 
3192  int rc = 0;
3193  int status = PCMK_LRM_OP_PENDING-1;
3194  int target_rc = get_target_rc(xml_op);
3195  int interval = 0;
3196 
3197  gboolean expired = FALSE;
3198  resource_t *parent = rsc;
3199  enum action_fail_response failure_strategy = action_fail_recover;
3200 
3201  CRM_CHECK(rsc != NULL, return FALSE);
3202  CRM_CHECK(node != NULL, return FALSE);
3203  CRM_CHECK(xml_op != NULL, return FALSE);
3204 
3205  task_key = get_op_key(xml_op);
3206 
3207  task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
3209 
3210  crm_element_value_int(xml_op, XML_LRM_ATTR_RC, &rc);
3211  crm_element_value_int(xml_op, XML_LRM_ATTR_CALLID, &task_id);
3212  crm_element_value_int(xml_op, XML_LRM_ATTR_OPSTATUS, &status);
3213  crm_element_value_int(xml_op, XML_LRM_ATTR_INTERVAL, &interval);
3214 
3215  CRM_CHECK(task != NULL, return FALSE);
3216  CRM_CHECK(status <= PCMK_LRM_OP_NOT_INSTALLED, return FALSE);
3217  CRM_CHECK(status >= PCMK_LRM_OP_PENDING, return FALSE);
3218 
3219  if (safe_str_eq(task, CRMD_ACTION_NOTIFY) ||
3221  /* safe to ignore these */
3222  return TRUE;
3223  }
3224 
3225  if (is_not_set(rsc->flags, pe_rsc_unique)) {
3226  parent = uber_parent(rsc);
3227  }
3228 
3229  pe_rsc_trace(rsc, "Unpacking task %s/%s (call_id=%d, status=%d, rc=%d) on %s (role=%s)",
3230  task_key, task, task_id, status, rc, node->details->uname, role2text(rsc->role));
3231 
3232  if (node->details->unclean) {
3233  pe_rsc_trace(rsc, "Node %s (where %s is running) is unclean."
3234  " Further action depends on the value of the stop's on-fail attribute",
3235  node->details->uname, rsc->id);
3236  }
3237 
3238  if (status == PCMK_LRM_OP_ERROR) {
3239  /* Older versions set this if rc != 0 but it's up to us to decide */
3240  status = PCMK_LRM_OP_DONE;
3241  }
3242 
3243  if(status != PCMK_LRM_OP_NOT_INSTALLED) {
3244  expired = check_operation_expiry(rsc, node, rc, xml_op, data_set);
3245  }
3246 
3247  /* Degraded results are informational only, re-map them to their error-free equivalents */
3248  if (rc == PCMK_OCF_DEGRADED && safe_str_eq(task, CRMD_ACTION_STATUS)) {
3249  rc = PCMK_OCF_OK;
3250 
3251  /* Add them to the failed list to highlight them for the user */
3252  if ((node->details->shutdown == FALSE) || (node->details->online == TRUE)) {
3253  crm_trace("Remapping %d to %d", PCMK_OCF_DEGRADED, PCMK_OCF_OK);
3254  record_failed_op(xml_op, node, rsc, data_set);
3255  }
3256 
3257  } else if (rc == PCMK_OCF_DEGRADED_MASTER && safe_str_eq(task, CRMD_ACTION_STATUS)) {
3259 
3260  /* Add them to the failed list to highlight them for the user */
3261  if ((node->details->shutdown == FALSE) || (node->details->online == TRUE)) {
3263  record_failed_op(xml_op, node, rsc, data_set);
3264  }
3265  }
3266 
3267  if (expired && target_rc != rc) {
3268  const char *magic = crm_element_value(xml_op, XML_ATTR_TRANSITION_MAGIC);
3269 
3270  pe_rsc_debug(rsc, "Expired operation '%s' on %s returned '%s' (%d) instead of the expected value: '%s' (%d)",
3271  key, node->details->uname,
3272  services_ocf_exitcode_str(rc), rc,
3273  services_ocf_exitcode_str(target_rc), target_rc);
3274 
3275  if(interval == 0) {
3276  crm_notice("Ignoring expired calculated failure %s (rc=%d, magic=%s) on %s",
3277  task_key, rc, magic, node->details->uname);
3278  goto done;
3279 
3280  } else if(node->details->online && node->details->unclean == FALSE) {
3281  crm_notice("Re-initiated expired calculated failure %s (rc=%d, magic=%s) on %s",
3282  task_key, rc, magic, node->details->uname);
3283  /* This is SO horrible, but we don't have access to CancelXmlOp() yet */
3284  crm_xml_add(xml_op, XML_LRM_ATTR_RESTART_DIGEST, "calculated-failure-timeout");
3285  goto done;
3286  }
3287  }
3288 
3289  if(status == PCMK_LRM_OP_DONE || status == PCMK_LRM_OP_ERROR) {
3290  status = determine_op_status(rsc, rc, target_rc, node, xml_op, on_fail, data_set);
3291  }
3292 
3293  pe_rsc_trace(rsc, "Handling status: %d", status);
3294  switch (status) {
3295  case PCMK_LRM_OP_CANCELLED:
3296  /* do nothing?? */
3297  pe_err("Don't know what to do for cancelled ops yet");
3298  break;
3299 
3300  case PCMK_LRM_OP_PENDING:
3301  if (safe_str_eq(task, CRMD_ACTION_START)) {
3303  set_active(rsc);
3304 
3305  } else if (safe_str_eq(task, CRMD_ACTION_PROMOTE)) {
3306  rsc->role = RSC_ROLE_MASTER;
3307 
3308  } else if (safe_str_eq(task, CRMD_ACTION_MIGRATE) && node->details->unclean) {
3309  /* If a pending migrate_to action is out on a unclean node,
3310  * we have to force the stop action on the target. */
3311  const char *migrate_target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET);
3312  node_t *target = pe_find_node(data_set->nodes, migrate_target);
3313  if (target) {
3314  stop_action(rsc, target, FALSE);
3315  }
3316  }
3317 
3318  if (rsc->pending_task == NULL) {
3319  if (safe_str_eq(task, CRMD_ACTION_STATUS) && interval == 0) {
3320  /* Pending probes are not printed, even if pending
3321  * operations are requested. If someone ever requests that
3322  * behavior, uncomment this and the corresponding part of
3323  * native.c:native_pending_task().
3324  */
3325  /*rsc->pending_task = strdup("probe");*/
3326 
3327  } else {
3328  rsc->pending_task = strdup(task);
3329  }
3330  }
3331  break;
3332 
3333  case PCMK_LRM_OP_DONE:
3334  pe_rsc_trace(rsc, "%s/%s completed on %s", rsc->id, task, node->details->uname);
3335  update_resource_state(rsc, node, xml_op, task, rc, *last_failure, on_fail, data_set);
3336  break;
3337 
3339  failure_strategy = get_action_on_fail(rsc, task_key, task, data_set);
3340  if (failure_strategy == action_fail_ignore) {
3341  crm_warn("Cannot ignore failed %s (status=%d, rc=%d) on %s: "
3342  "Resource agent doesn't exist",
3343  task_key, status, rc, node->details->uname);
3344  /* Also for printing it as "FAILED" by marking it as pe_rsc_failed later */
3345  *on_fail = action_fail_migrate;
3346  }
3347  resource_location(parent, node, -INFINITY, "hard-error", data_set);
3348  unpack_rsc_op_failure(rsc, node, rc, xml_op, last_failure, on_fail, data_set);
3349  break;
3350 
3351  case PCMK_LRM_OP_ERROR:
3354  case PCMK_LRM_OP_TIMEOUT:
3356 
3357  failure_strategy = get_action_on_fail(rsc, task_key, task, data_set);
3358  if ((failure_strategy == action_fail_ignore)
3359  || (failure_strategy == action_fail_restart_container
3360  && safe_str_eq(task, CRMD_ACTION_STOP))) {
3361 
3362  crm_warn("Pretending the failure of %s (rc=%d) on %s succeeded",
3363  task_key, rc, node->details->uname);
3364 
3365  update_resource_state(rsc, node, xml_op, task, target_rc, *last_failure, on_fail, data_set);
3366  crm_xml_add(xml_op, XML_ATTR_UNAME, node->details->uname);
3368 
3369  record_failed_op(xml_op, node, rsc, data_set);
3370 
3371  if (failure_strategy == action_fail_restart_container && *on_fail <= action_fail_recover) {
3372  *on_fail = failure_strategy;
3373  }
3374 
3375  } else {
3376  unpack_rsc_op_failure(rsc, node, rc, xml_op, last_failure, on_fail, data_set);
3377 
3378  if(status == PCMK_LRM_OP_ERROR_HARD) {
3379  do_crm_log(rc != PCMK_OCF_NOT_INSTALLED?LOG_ERR:LOG_NOTICE,
3380  "Preventing %s from re-starting on %s: operation %s failed '%s' (%d)",
3381  parent->id, node->details->uname,
3382  task, services_ocf_exitcode_str(rc), rc);
3383 
3384  resource_location(parent, node, -INFINITY, "hard-error", data_set);
3385 
3386  } else if(status == PCMK_LRM_OP_ERROR_FATAL) {
3387  crm_err("Preventing %s from re-starting anywhere: operation %s failed '%s' (%d)",
3388  parent->id, task, services_ocf_exitcode_str(rc), rc);
3389 
3390  resource_location(parent, NULL, -INFINITY, "fatal-error", data_set);
3391  }
3392  }
3393  break;
3394  }
3395 
3396  done:
3397  pe_rsc_trace(rsc, "Resource %s after %s: role=%s, next=%s", rsc->id, task, role2text(rsc->role), role2text(rsc->next_role));
3398  return TRUE;
3399 }
3400 
3401 gboolean
3402 add_node_attrs(xmlNode * xml_obj, node_t * node, gboolean overwrite, pe_working_set_t * data_set)
3403 {
3404  const char *cluster_name = NULL;
3405 
3406  g_hash_table_insert(node->details->attrs,
3407  strdup(CRM_ATTR_UNAME), strdup(node->details->uname));
3408 
3409  g_hash_table_insert(node->details->attrs, strdup(CRM_ATTR_ID),
3410  strdup(node->details->id));
3411  if (safe_str_eq(node->details->id, data_set->dc_uuid)) {
3412  data_set->dc_node = node;
3413  node->details->is_dc = TRUE;
3414  g_hash_table_insert(node->details->attrs,
3415  strdup(CRM_ATTR_IS_DC), strdup(XML_BOOLEAN_TRUE));
3416  } else {
3417  g_hash_table_insert(node->details->attrs,
3418  strdup(CRM_ATTR_IS_DC), strdup(XML_BOOLEAN_FALSE));
3419  }
3420 
3421  cluster_name = g_hash_table_lookup(data_set->config_hash, "cluster-name");
3422  if (cluster_name) {
3423  g_hash_table_insert(node->details->attrs, strdup(CRM_ATTR_CLUSTER_NAME),
3424  strdup(cluster_name));
3425  }
3426 
3427  unpack_instance_attributes(data_set->input, xml_obj, XML_TAG_ATTR_SETS, NULL,
3428  node->details->attrs, NULL, overwrite, data_set->now);
3429 
3430  if (pe_node_attribute_raw(node, CRM_ATTR_SITE_NAME) == NULL) {
3431  const char *site_name = pe_node_attribute_raw(node, "site-name");
3432 
3433  if (site_name) {
3434  g_hash_table_insert(node->details->attrs,
3435  strdup(CRM_ATTR_SITE_NAME),
3436  strdup(site_name));
3437 
3438  } else if (cluster_name) {
3439  /* Default to cluster-name if unset */
3440  g_hash_table_insert(node->details->attrs,
3441  strdup(CRM_ATTR_SITE_NAME),
3442  strdup(cluster_name));
3443  }
3444  }
3445  return TRUE;
3446 }
3447 
3448 static GListPtr
3449 extract_operations(const char *node, const char *rsc, xmlNode * rsc_entry, gboolean active_filter)
3450 {
3451  int counter = -1;
3452  int stop_index = -1;
3453  int start_index = -1;
3454 
3455  xmlNode *rsc_op = NULL;
3456 
3457  GListPtr gIter = NULL;
3458  GListPtr op_list = NULL;
3459  GListPtr sorted_op_list = NULL;
3460 
3461  /* extract operations */
3462  op_list = NULL;
3463  sorted_op_list = NULL;
3464 
3465  for (rsc_op = __xml_first_child(rsc_entry); rsc_op != NULL; rsc_op = __xml_next_element(rsc_op)) {
3466  if (crm_str_eq((const char *)rsc_op->name, XML_LRM_TAG_RSC_OP, TRUE)) {
3467  crm_xml_add(rsc_op, "resource", rsc);
3468  crm_xml_add(rsc_op, XML_ATTR_UNAME, node);
3469  op_list = g_list_prepend(op_list, rsc_op);
3470  }
3471  }
3472 
3473  if (op_list == NULL) {
3474  /* if there are no operations, there is nothing to do */
3475  return NULL;
3476  }
3477 
3478  sorted_op_list = g_list_sort(op_list, sort_op_by_callid);
3479 
3480  /* create active recurring operations as optional */
3481  if (active_filter == FALSE) {
3482  return sorted_op_list;
3483  }
3484 
3485  op_list = NULL;
3486 
3487  calculate_active_ops(sorted_op_list, &start_index, &stop_index);
3488 
3489  for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) {
3490  xmlNode *rsc_op = (xmlNode *) gIter->data;
3491 
3492  counter++;
3493 
3494  if (start_index < stop_index) {
3495  crm_trace("Skipping %s: not active", ID(rsc_entry));
3496  break;
3497 
3498  } else if (counter < start_index) {
3499  crm_trace("Skipping %s: old", ID(rsc_op));
3500  continue;
3501  }
3502  op_list = g_list_append(op_list, rsc_op);
3503  }
3504 
3505  g_list_free(sorted_op_list);
3506  return op_list;
3507 }
3508 
3509 GListPtr
3510 find_operations(const char *rsc, const char *node, gboolean active_filter,
3511  pe_working_set_t * data_set)
3512 {
3513  GListPtr output = NULL;
3514  GListPtr intermediate = NULL;
3515 
3516  xmlNode *tmp = NULL;
3517  xmlNode *status = find_xml_node(data_set->input, XML_CIB_TAG_STATUS, TRUE);
3518 
3519  node_t *this_node = NULL;
3520 
3521  xmlNode *node_state = NULL;
3522 
3523  for (node_state = __xml_first_child(status); node_state != NULL;
3524  node_state = __xml_next_element(node_state)) {
3525 
3526  if (crm_str_eq((const char *)node_state->name, XML_CIB_TAG_STATE, TRUE)) {
3527  const char *uname = crm_element_value(node_state, XML_ATTR_UNAME);
3528 
3529  if (node != NULL && safe_str_neq(uname, node)) {
3530  continue;
3531  }
3532 
3533  this_node = pe_find_node(data_set->nodes, uname);
3534  if(this_node == NULL) {
3535  CRM_LOG_ASSERT(this_node != NULL);
3536  continue;
3537 
3538  } else if (is_remote_node(this_node)) {
3539  determine_remote_online_status(data_set, this_node);
3540 
3541  } else {
3542  determine_online_status(node_state, this_node, data_set);
3543  }
3544 
3545  if (this_node->details->online || is_set(data_set->flags, pe_flag_stonith_enabled)) {
3546  /* offline nodes run no resources...
3547  * unless stonith is enabled in which case we need to
3548  * make sure rsc start events happen after the stonith
3549  */
3550  xmlNode *lrm_rsc = NULL;
3551 
3552  tmp = find_xml_node(node_state, XML_CIB_TAG_LRM, FALSE);
3553  tmp = find_xml_node(tmp, XML_LRM_TAG_RESOURCES, FALSE);
3554 
3555  for (lrm_rsc = __xml_first_child(tmp); lrm_rsc != NULL;
3556  lrm_rsc = __xml_next_element(lrm_rsc)) {
3557  if (crm_str_eq((const char *)lrm_rsc->name, XML_LRM_TAG_RESOURCE, TRUE)) {
3558 
3559  const char *rsc_id = crm_element_value(lrm_rsc, XML_ATTR_ID);
3560 
3561  if (rsc != NULL && safe_str_neq(rsc_id, rsc)) {
3562  continue;
3563  }
3564 
3565  intermediate = extract_operations(uname, rsc_id, lrm_rsc, active_filter);
3566  output = g_list_concat(output, intermediate);
3567  }
3568  }
3569  }
3570  }
3571  }
3572 
3573  return output;
3574 }
GHashTable * tags
Definition: status.h:134
Services API.
gboolean unpack_config(xmlNode *config, pe_working_set_t *data_set)
Definition: unpack.c:176
#define CRM_CHECK(expr, failure_action)
Definition: logging.h:164
GListPtr nodes
Definition: status.h:111
#define XML_RSC_OP_LAST_CHANGE
Definition: msg_xml.h:305
gboolean unpack_rsc_op(resource_t *rsc, node_t *node, xmlNode *xml_op, xmlNode **last_failure, enum action_fail_response *failed, pe_working_set_t *data_set)
Definition: unpack.c:3183
void verify_pe_options(GHashTable *options)
Definition: common.c:184
#define STATUS_PATH_MAX
Definition: unpack.c:2422
xmlNode * find_xml_node(xmlNode *cib, const char *node_path, gboolean must_find)
Definition: xml.c:2248
const char * uname
Definition: status.h:143
A dumping ground.
#define crm_notice(fmt, args...)
Definition: logging.h:250
#define CRMD_ACTION_MIGRATED
Definition: crm.h:165
xmlNode * failed
Definition: status.h:119
#define pe_rsc_debug(rsc, fmt, args...)
Definition: internal.h:15
#define pe_flag_have_stonith_resource
Definition: status.h:69
node_t * pe_create_node(const char *id, const char *uname, const char *type, const char *score, pe_working_set_t *data_set)
Definition: unpack.c:362
gboolean safe_str_neq(const char *a, const char *b)
Definition: strings.c:150
#define INFINITY
Definition: crm.h:73
gint sort_rsc_priority(gconstpointer a, gconstpointer b)
Definition: utils.c:392
gboolean determine_online_status(xmlNode *node_state, node_t *this_node, pe_working_set_t *data_set)
Definition: unpack.c:1495
#define CRM_ATTR_KIND
Definition: crm.h:90
gboolean get_target_role(resource_t *rsc, enum rsc_role_e *role)
Definition: utils.c:1745
#define XML_NODE_IS_FENCED
Definition: msg_xml.h:274
#define XML_ATTR_TRANSITION_MAGIC
Definition: msg_xml.h:385
node_t * node_copy(const node_t *this_node)
Definition: utils.c:127
#define CRM_ATTR_IS_DC
Definition: crm.h:92
#define stop_action(rsc, node, optional)
Definition: internal.h:197
#define pe_flag_enable_unfencing
Definition: status.h:70
#define pe_rsc_orphan_container_filler
Definition: status.h:191
int default_resource_stickiness
Definition: status.h:102
const char * id
Definition: status.h:142
char * clone_strip(const char *last_rsc_id)
Definition: unpack.c:1615
#define XML_ATTR_QUORUM_PANIC
Definition: msg_xml.h:89
int weight
Definition: status.h:179
#define XML_ATTR_TYPE
Definition: msg_xml.h:105
bool pe_can_fence(pe_working_set_t *data_set, node_t *node)
Definition: utils.c:100
#define XML_TAG_UTILIZATION
Definition: msg_xml.h:193
time_t last_granted
Definition: status.h:396
#define pe_flag_have_remote_nodes
Definition: status.h:83
void(* free)(resource_t *)
Definition: complex.h:51
bool container_fix_remote_addr(resource_t *rsc)
Definition: container.c:757
#define XML_RULE_ATTR_SCORE
Definition: msg_xml.h:321
#define XML_BOOLEAN_FALSE
Definition: msg_xml.h:118
#define crm_config_err(fmt...)
Definition: crm_internal.h:256
int get_target_rc(xmlNode *xml_op)
Definition: unpack.c:3064
action_t * pe_fence_op(node_t *node, const char *op, bool optional, const char *reason, pe_working_set_t *data_set)
Definition: utils.c:2195
enum action_fail_response on_fail
Definition: status.h:349
#define pe_rsc_orphan
Definition: status.h:188
int char2score(const char *score)
Definition: utils.c:230
#define pe_proc_warn(fmt...)
Definition: internal.h:21
#define XML_TAG_TRANSIENT_NODEATTRS
Definition: msg_xml.h:391
#define CRMD_ACTION_NOTIFY
Definition: crm.h:178
#define pe_flag_startup_probes
Definition: status.h:81
long long crm_get_msec(const char *input)
Definition: utils.c:598
GListPtr running_rsc
Definition: status.h:157
GListPtr find_actions(GListPtr input, const char *key, const node_t *on_node)
Definition: utils.c:1470
gboolean common_unpack(xmlNode *xml_obj, resource_t **rsc, resource_t *parent, pe_working_set_t *data_set)
Definition: complex.c:454
enum pe_obj_types variant
Definition: status.h:269
#define XML_LRM_ATTR_INTERVAL
Definition: msg_xml.h:284
#define XML_CIB_TAG_TAG
Definition: msg_xml.h:418
#define XML_LRM_TAG_RESOURCE
Definition: msg_xml.h:250
#define pe_flag_stop_rsc_orphans
Definition: status.h:73
gboolean pending
Definition: status.h:149
node_t * partial_migration_source
Definition: status.h:309
#define CRMD_ACTION_PROMOTE
Definition: crm.h:173
int crm_parse_int(const char *text, const char *default_text)
Definition: strings.c:125
gboolean fixed
Definition: status.h:180
GListPtr resources
Definition: status.h:112
#define XML_NVPAIR_ATTR_NAME
Definition: msg_xml.h:367
#define XML_NODE_IS_MAINTENANCE
Definition: msg_xml.h:275
gint sort_op_by_callid(gconstpointer a, gconstpointer b)
Definition: utils.c:1615
#define XML_NODE_EXPECTED
Definition: msg_xml.h:270
node_t * pe_find_node(GListPtr node_list, const char *uname)
Definition: status.c:301
#define XML_CIB_TAG_RSC_TEMPLATE
Definition: msg_xml.h:202
AIS_Host host
Definition: internal.h:52
resource_t * create_child_clone(resource_t *rsc, int sub_id, pe_working_set_t *data_set)
Definition: clone.c:73
time_t get_effective_time(pe_working_set_t *data_set)
Definition: utils.c:1730
no_quorum_policy_t no_quorum_policy
Definition: status.h:103
#define CRM_LOG_ASSERT(expr)
Definition: logging.h:150
const char * pe_pref(GHashTable *options, const char *name)
Definition: common.c:190
char * clone_name
Definition: status.h:262
xmlNode * params_restart
Definition: internal.h:296
resource_t * uber_parent(resource_t *rsc)
Definition: complex.c:894
resource_t * remote_rsc
Definition: status.h:160
#define clear_bit(word, bit)
Definition: crm_internal.h:191
void copy_in_properties(xmlNode *target, xmlNode *src)
Definition: xml.c:2320
#define CRMD_JOINSTATE_NACK
Definition: crm.h:158
#define XML_CIB_TAG_LRM
Definition: msg_xml.h:248
#define CRM_ATTR_CLUSTER_NAME
Definition: crm.h:93
GHashTable * tickets
Definition: status.h:106
node_t * dc_node
Definition: status.h:95
enum rsc_role_e role
Definition: status.h:298
#define pe_rsc_allow_migrate
Definition: status.h:214
GListPtr children
Definition: status.h:305
#define XPATH_ENABLE_UNFENCING
Definition: unpack.c:154
xmlNode * get_xpath_object(const char *xpath, xmlNode *xml_obj, int error_level)
Definition: xpath.c:224
#define pe_proc_err(fmt...)
Definition: internal.h:20
action_fail_response
Definition: common.h:29
char * strndup(const char *str, size_t len)
char * dc_uuid
Definition: status.h:94
gboolean is_remote_node
Definition: status.h:283
int stonith_timeout
Definition: status.h:101
gboolean standby
Definition: status.h:147
#define XML_CIB_TAG_PROPSET
Definition: msg_xml.h:184
char * id
Definition: status.h:261
gboolean decode_transition_key(const char *key, char **uuid, int *action, int *transition_id, int *target_rc)
Definition: operations.c:233
resource_t * find_container_child(const resource_t *bundle, const node_t *node)
Definition: container.c:1179
#define XML_LRM_ATTR_RSCID
Definition: msg_xml.h:294
gboolean unpack_resources(xmlNode *xml_resources, pe_working_set_t *data_set)
Definition: unpack.c:743
#define CRMD_ACTION_START
Definition: crm.h:167
#define XML_LRM_ATTR_TASK_KEY
Definition: msg_xml.h:286
#define pe_rsc_block
Definition: status.h:190
#define XML_TAG_ATTR_SETS
Definition: msg_xml.h:185
GHashTable * utilization
Definition: status.h:165
#define XML_LRM_ATTR_TASK
Definition: msg_xml.h:285
const char * pe_base_name_end(const char *id)
Definition: unpack.c:1575
const char * role2text(enum rsc_role_e role)
Definition: common.c:346
char uname[MAX_NAME]
Definition: internal.h:53
gboolean is_remote_node(node_t *node)
Definition: remote.c:62
#define CRMD_ACTION_STOP
Definition: crm.h:170
#define CRM_OP_CLEAR_FAILCOUNT
Definition: crm.h:129
struct node_shared_s * details
Definition: status.h:182
gboolean unpack_status(xmlNode *status, pe_working_set_t *data_set)
Definition: unpack.c:1163
#define CRMD_JOINSTATE_DOWN
Definition: crm.h:155
#define crm_warn(fmt, args...)
Definition: logging.h:249
#define CRMD_ACTION_DEMOTE
Definition: crm.h:175
#define set_bit(word, bit)
Definition: crm_internal.h:190
#define crm_atoi(text, default_text)
Definition: util.h:110
gboolean unclean
Definition: status.h:150
xmlNode * pe_create_remote_xml(xmlNode *parent, const char *uname, const char *container_id, const char *migrateable, const char *is_managed, const char *interval, const char *monitor_timeout, const char *start_timeout, const char *server, const char *port)
Definition: remote.c:158
uint32_t id
Definition: internal.h:48
#define crm_debug(fmt, args...)
Definition: logging.h:253
void native_add_running(resource_t *rsc, node_t *node, pe_working_set_t *data_set)
Definition: native.c:36
#define XML_CIB_ATTR_SHUTDOWN
Definition: msg_xml.h:277
#define XML_RSC_ATTR_CONTAINER
Definition: msg_xml.h:230
Utility functions.
#define XML_ATTR_ID
Definition: msg_xml.h:102
char * pending_task
Definition: status.h:314
#define XML_CIB_TAG_RESOURCE
Definition: msg_xml.h:196
gboolean unpack_nodes(xmlNode *xml_nodes, pe_working_set_t *data_set)
Definition: unpack.c:531
#define XML_BOOLEAN_TRUE
Definition: msg_xml.h:117
#define XML_CIB_TAG_STATE
Definition: msg_xml.h:178
#define pe_rsc_failed
Definition: status.h:206
char * digest_all_calc
Definition: internal.h:297
#define stop_key(rsc)
Definition: internal.h:196
node_t * partial_migration_target
Definition: status.h:308
#define pe_flag_startup_fencing
Definition: status.h:79
resource_object_functions_t * fns
Definition: status.h:270
resource_t * container
Definition: status.h:311
#define CRM_ATTR_UNAME
Definition: crm.h:88
GHashTable * allowed_nodes
Definition: status.h:296
GHashTable * digest_cache
Definition: status.h:168
#define set_config_flag(data_set, option, flag)
Definition: unpack.c:34
#define XML_NODE_IS_PEER
Definition: msg_xml.h:272
#define crm_trace(fmt, args...)
Definition: logging.h:254
#define CRMD_JOINSTATE_MEMBER
Definition: crm.h:157
#define do_crm_log(level, fmt, args...)
Log a message.
Definition: logging.h:129
enum rsc_digest_cmp_val rc
Definition: internal.h:293
gboolean is_baremetal_remote_node(node_t *node)
Definition: remote.c:44
#define pe_rsc_is_container
Definition: status.h:219
char * digest_secure_calc
Definition: internal.h:298
gboolean unpack_remote_nodes(xmlNode *xml_resources, pe_working_set_t *data_set)
Definition: unpack.c:617
gboolean add_node_attrs(xmlNode *xml_obj, node_t *node, gboolean overwrite, pe_working_set_t *data_set)
Definition: unpack.c:3402
GHashTable * meta
Definition: status.h:359
gboolean is_container_remote_node(node_t *node)
Definition: remote.c:53
xmlNode * add_node_copy(xmlNode *new_parent, xmlNode *xml_node)
Definition: xml.c:2438
GListPtr refs
Definition: status.h:403
gboolean unpacked
Definition: status.h:175
const char * stonith_action
Definition: status.h:96
#define crm_log_xml_debug(xml, text)
Definition: logging.h:261
#define XML_TAG_META_SETS
Definition: msg_xml.h:186
Wrappers for and extensions to libxml2.
#define XML_ATTR_TE_NOWAIT
Definition: msg_xml.h:388
GHashTable * config_hash
Definition: status.h:105
#define XML_ATTR_UNAME
Definition: msg_xml.h:130
gboolean add_tag_ref(GHashTable *tags, const char *tag_name, const char *obj_ref)
Definition: utils.c:2310
#define XML_BOOLEAN_YES
Definition: msg_xml.h:119
#define XML_RSC_ATTR_MANAGED
Definition: msg_xml.h:219
xmlNode * create_xml_node(xmlNode *parent, const char *name)
Definition: xml.c:2621
gboolean is_dc
Definition: status.h:154
int crm_element_value_int(xmlNode *data, const char *name, int *dest)
Definition: xml.c:3877
char * clone_zero(const char *last_rsc_id)
Definition: unpack.c:1637
const char * crm_element_value(xmlNode *data, const char *name)
Definition: xml.c:5224
action_t * custom_action(resource_t *rsc, char *key, const char *task, node_t *on_node, gboolean optional, gboolean foo, pe_working_set_t *data_set)
Definition: utils.c:419
unsigned long long flags
Definition: status.h:285
const char * pe_node_attribute_raw(pe_node_t *node, const char *name)
Definition: common.c:478
#define pe_flag_maintenance_mode
Definition: status.h:66
resource_t * parent
Definition: status.h:267
node_t *(* location)(resource_t *, GListPtr *, gboolean)
Definition: complex.h:50
#define XML_LRM_ATTR_MIGRATE_TARGET
Definition: msg_xml.h:311
#define CIB_OPTIONS_FIRST
Definition: msg_xml.h:53
#define XML_RSC_ATTR_REMOTE_NODE
Definition: msg_xml.h:233
char * uuid
Definition: status.h:344
#define XML_LRM_ATTR_RESTART_DIGEST
Definition: msg_xml.h:301
GListPtr dangling_migrations
Definition: status.h:306
void free_xml(xmlNode *child)
Definition: xml.c:2739
#define pe_flag_stop_everything
Definition: status.h:75
xmlNode * input
Definition: status.h:90
gboolean crm_str_eq(const char *a, const char *b, gboolean use_case)
Definition: strings.c:213
#define XML_CIB_TAG_NODE
Definition: msg_xml.h:179
GListPtr fillers
Definition: status.h:312
const char * placement_strategy
Definition: status.h:97
gboolean unseen
Definition: status.h:151
int failure_timeout
Definition: status.h:279
xmlNode * params_all
Definition: internal.h:294
uint32_t counter
Definition: internal.h:50
int remote_reconnect_interval
Definition: status.h:318
gboolean remote_maintenance
Definition: status.h:174
#define crm_config_warn(fmt...)
Definition: crm_internal.h:257
GListPtr actions
Definition: status.h:290
#define XML_ATTR_TRANSITION_KEY
Definition: msg_xml.h:386
const char * crm_xml_add(xmlNode *node, const char *name, const char *value)
Definition: xml.c:2523
#define CRM_XS
Definition: logging.h:42
gboolean maintenance
Definition: status.h:170
#define pe_rsc_unique
Definition: status.h:194
GHashTable * node_hash_from_list(GListPtr list)
Definition: utils.c:183
const char * localhost
Definition: status.h:133
GHashTable * meta
Definition: status.h:301
gboolean xml_contains_remote_node(xmlNode *xml)
Definition: remote.c:91
node_t * pe_find_node_any(GListPtr node_list, const char *id, const char *uname)
Definition: status.c:273
const char * fail2text(enum action_fail_response fail)
Definition: common.c:196
void add_hash_param(GHashTable *hash, const char *name, const char *value)
Definition: common.c:423
#define pe_flag_quick_location
Definition: status.h:85
#define pe_rsc_start_pending
Definition: status.h:209
#define XML_LRM_TAG_RESOURCES
Definition: msg_xml.h:249
gboolean standby_onfail
Definition: status.h:148
#define crm_err(fmt, args...)
Definition: logging.h:248
resource_t *(* find_rsc)(resource_t *parent, const char *search, node_t *node, int flags)
Definition: complex.h:44
#define XML_CIB_TAG_TICKET_STATE
Definition: msg_xml.h:415
void resource_location(resource_t *rsc, node_t *node, int score, const char *tag, pe_working_set_t *data_set)
Definition: utils.c:1571
xmlXPathObjectPtr xpath_search(xmlNode *xml_top, const char *path)
Definition: xpath.c:145
void pe_fence_node(pe_working_set_t *data_set, node_t *node, const char *reason)
Schedule a fence action for a node.
Definition: unpack.c:77
ticket_t * ticket_new(const char *ticket_id, pe_working_set_t *data_set)
Definition: utils.c:1862
GHashTable * attrs
Definition: status.h:162
bool remote_id_conflict(const char *remote_name, pe_working_set_t *data)
Definition: unpack.c:424
enum rsc_role_e next_role
Definition: status.h:299
gboolean online
Definition: status.h:146
#define XML_ATTR_HAVE_WATCHDOG
Definition: msg_xml.h:91
#define XML_NODE_ATTR_RSC_DISCOVERY
Definition: msg_xml.h:371
gboolean shutdown
Definition: status.h:152
int compare_version(const char *version1, const char *version2)
Definition: utils.c:486
gboolean rsc_discovery_enabled
Definition: status.h:171
#define pe_flag_remove_after_stop
Definition: status.h:78
#define CRMD_ACTION_METADATA
Definition: crm.h:182
#define pe_rsc_failure_ignored
Definition: status.h:216
xmlNode * params_secure
Definition: internal.h:295
#define XML_LRM_ATTR_CALLID
Definition: msg_xml.h:297
#define pe_rsc_managed
Definition: status.h:189
#define CRMD_ACTION_MIGRATE
Definition: crm.h:164
#define XML_NVPAIR_ATTR_VALUE
Definition: msg_xml.h:368
int node_score_red
Definition: utils.c:72
#define crm_str_hash
Definition: util.h:73
#define uint32_t
Definition: stdint.in.h:158
enum rsc_role_e fail_role
Definition: status.h:350
gboolean remote_requires_reset
Definition: status.h:172
char * id
Definition: status.h:402
#define CRM_ASSERT(expr)
Definition: error.h:35
char data[0]
Definition: internal.h:58
#define crm_str(x)
Definition: logging.h:274
#define XML_ATTR_CRM_VERSION
Definition: msg_xml.h:84
#define XML_LRM_ATTR_OPSTATUS
Definition: msg_xml.h:295
gboolean unpack_lrm_resources(node_t *node, xmlNode *lrm_rsc_list, pe_working_set_t *data_set)
Definition: unpack.c:2369
int pe_get_failcount(node_t *node, resource_t *rsc, time_t *last_failure, uint32_t flags, xmlNode *xml_op, pe_working_set_t *data_set)
Definition: failcounts.c:237
#define CRMD_JOINSTATE_PENDING
Definition: crm.h:156
enum node_type type
Definition: status.h:163
uint32_t pe_wo
Definition: unpack.c:50
rsc_role_e
Definition: common.h:81
enum pe_action_flags flags
Definition: status.h:347
GHashTable * known_on
Definition: status.h:295
#define XML_LRM_ATTR_RC
Definition: msg_xml.h:296
gboolean standby
Definition: status.h:397
Definition: status.h:401
GListPtr find_operations(const char *rsc, const char *node, gboolean active_filter, pe_working_set_t *data_set)
Definition: unpack.c:3510
#define XML_NODE_JOIN_STATE
Definition: msg_xml.h:269
gboolean expected_up
Definition: status.h:153
void pe_free_action(action_t *action)
Definition: utils.c:1355
#define pe_flag_have_quorum
Definition: status.h:63
void destroy_ticket(gpointer data)
Definition: utils.c:1850
#define XML_CIB_TAG_STATUS
Definition: msg_xml.h:159
#define XML_CIB_TAG_OBJ_REF
Definition: msg_xml.h:419
void unpack_instance_attributes(xmlNode *top, xmlNode *xml_obj, const char *set_name, GHashTable *node_hash, GHashTable *hash, const char *always_first, gboolean overwrite, crm_time_t *now)
Definition: rules.c:919
#define pe_flag_is_managed_default
Definition: status.h:65
gboolean granted
Definition: status.h:395
Definition: status.h:178
gboolean remote_was_fenced
Definition: status.h:173
#define XML_NODE_IN_CLUSTER
Definition: msg_xml.h:271
#define pe_flag_stop_action_orphans
Definition: status.h:74
#define NORMALNODE
Definition: util.h:42
gboolean crm_is_true(const char *s)
Definition: strings.c:165
#define CRM_ATTR_SITE_NAME
Definition: crm.h:94
void calculate_active_ops(GListPtr sorted_op_list, int *start_index, int *stop_index)
Definition: unpack.c:2187
#define XML_CIB_TAG_GROUP
Definition: msg_xml.h:197
CRM_TRACE_INIT_DATA(pe_status)
#define XML_LRM_TAG_RSC_OP
Definition: msg_xml.h:253
#define pe_rsc_trace(rsc, fmt, args...)
Definition: internal.h:16
#define pe_flag_symmetric_cluster
Definition: status.h:64
#define ID(x)
Definition: msg_xml.h:447
unsigned long long flags
Definition: status.h:99
char * generate_op_key(const char *rsc_id, const char *op_type, int interval)
Generate an operation key.
Definition: operations.c:37
#define pe_err(fmt...)
Definition: internal.h:18
void print_resource(int log_level, const char *pre_text, resource_t *rsc, gboolean details)
Definition: utils.c:1339
gboolean unpack_tags(xmlNode *xml_tags, pe_working_set_t *data_set)
Definition: unpack.c:803
resource_t * pe_find_resource(GListPtr rsc_list, const char *id_rh)
Definition: status.c:249
#define safe_str_eq(a, b)
Definition: util.h:72
int node_score_green
Definition: utils.c:73
#define ONLINESTATUS
Definition: util.h:52
gboolean order_actions(action_t *lh_action, action_t *rh_action, enum pe_ordering order)
Definition: utils.c:1781
char * id
Definition: status.h:394
op_digest_cache_t * rsc_action_digest_cmp(resource_t *rsc, xmlNode *xml_op, node_t *node, pe_working_set_t *data_set)
Definition: utils.c:2035
char * crm_strdup_printf(char const *format,...) __attribute__((__format__(__printf__
#define XML_LRM_ATTR_MIGRATE_SOURCE
Definition: msg_xml.h:310
#define LOG_DEBUG_3
Definition: logging.h:32
void freeXpathObject(xmlXPathObjectPtr xpathObj)
Definition: xpath.c:45
#define CRM_ATTR_ID
Definition: crm.h:89
gint sort_node_uname(gconstpointer a, gconstpointer b)
Definition: utils.c:225
GList * GListPtr
Definition: crm.h:210
int node_score_yellow
Definition: utils.c:74
#define XML_CIB_TAG_TICKETS
Definition: msg_xml.h:414
crm_time_t * now
Definition: status.h:91
#define crm_info(fmt, args...)
Definition: logging.h:251
char * digest_restart_calc
Definition: internal.h:299
void g_hash_destroy_str(gpointer data)
Definition: strings.c:74
GHashTable * template_rsc_sets
Definition: status.h:132
#define pe_flag_concurrent_fencing
Definition: status.h:71
GHashTable * state
Definition: status.h:398
#define pe_flag_start_failure_fatal
Definition: status.h:77
#define pe_flag_stonith_enabled
Definition: status.h:68
enum crm_ais_msg_types type
Definition: internal.h:51
#define pe_warn_once(pe_wo_bit, fmt...)
Definition: unpack.h:116
#define pe_rsc_info(rsc, fmt, args...)
Definition: internal.h:14
#define CRMD_ACTION_STATUS
Definition: crm.h:181
GListPtr running_on
Definition: status.h:294