pacemaker 2.1.8-2.1.8~rc1
Scalable High-Availability cluster resource manager
Loading...
Searching...
No Matches
unpack.c
Go to the documentation of this file.
1/*
2 * Copyright 2004-2024 the Pacemaker project contributors
3 *
4 * The version control history for this file may have further details.
5 *
6 * This source code is licensed under the GNU Lesser General Public License
7 * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
8 */
9
10#include <crm_internal.h>
11
12#include <stdio.h>
13#include <string.h>
14#include <glib.h>
15#include <time.h>
16
17#include <crm/crm.h>
18#include <crm/services.h>
19#include <crm/common/xml.h>
21
22#include <crm/common/util.h>
23#include <crm/pengine/rules.h>
25#include <pe_status_private.h>
26
28
29// A (parsed) resource action history entry
30struct action_history {
31 pcmk_resource_t *rsc; // Resource that history is for
32 pcmk_node_t *node; // Node that history is for
33 xmlNode *xml; // History entry XML
34
35 // Parsed from entry XML
36 const char *id; // XML ID of history entry
37 const char *key; // Operation key of action
38 const char *task; // Action name
39 const char *exit_reason; // Exit reason given for result
40 guint interval_ms; // Action interval
41 int call_id; // Call ID of action
42 int expected_exit_status; // Expected exit status of action
43 int exit_status; // Actual exit status of action
44 int execution_status; // Execution status of action
45};
46
47/* This uses pcmk__set_flags_as()/pcmk__clear_flags_as() directly rather than
48 * use pcmk__set_scheduler_flags()/pcmk__clear_scheduler_flags() so that the
49 * flag is stringified more readably in log messages.
50 */
51#define set_config_flag(scheduler, option, flag) do { \
52 GHashTable *config_hash = (scheduler)->config_hash; \
53 const char *scf_value = pcmk__cluster_option(config_hash, (option)); \
54 \
55 if (scf_value != NULL) { \
56 if (crm_is_true(scf_value)) { \
57 (scheduler)->flags = pcmk__set_flags_as(__func__, __LINE__, \
58 LOG_TRACE, "Scheduler", \
59 crm_system_name, (scheduler)->flags, \
60 (flag), #flag); \
61 } else { \
62 (scheduler)->flags = pcmk__clear_flags_as(__func__, __LINE__, \
63 LOG_TRACE, "Scheduler", \
64 crm_system_name, (scheduler)->flags, \
65 (flag), #flag); \
66 } \
67 } \
68 } while(0)
69
70static void unpack_rsc_op(pcmk_resource_t *rsc, pcmk_node_t *node,
71 xmlNode *xml_op, xmlNode **last_failure,
72 enum action_fail_response *failed);
73static void determine_remote_online_status(pcmk_scheduler_t *scheduler,
74 pcmk_node_t *this_node);
75static void add_node_attrs(const xmlNode *xml_obj, pcmk_node_t *node,
76 bool overwrite, pcmk_scheduler_t *scheduler);
77static void determine_online_status(const xmlNode *node_state,
78 pcmk_node_t *this_node,
80
81static void unpack_node_lrm(pcmk_node_t *node, const xmlNode *xml,
83
84
85static gboolean
86is_dangling_guest_node(pcmk_node_t *node)
87{
88 /* we are looking for a remote-node that was supposed to be mapped to a
89 * container resource, but all traces of that container have disappeared
90 * from both the config and the status section. */
91 if (pcmk__is_pacemaker_remote_node(node)
92 && (node->details->remote_rsc != NULL)
93 && (node->details->remote_rsc->container == NULL)
96 return TRUE;
97 }
98
99 return FALSE;
100}
101
111void
113 const char *reason, bool priority_delay)
114{
115 CRM_CHECK(node, return);
116
117 /* A guest node is fenced by marking its container as failed */
118 if (pcmk__is_guest_or_bundle_node(node)) {
120
121 if (!pcmk_is_set(rsc->flags, pcmk_rsc_failed)) {
122 if (!pcmk_is_set(rsc->flags, pcmk_rsc_managed)) {
123 crm_notice("Not fencing guest node %s "
124 "(otherwise would because %s): "
125 "its guest resource %s is unmanaged",
126 pcmk__node_name(node), reason, rsc->id);
127 } else {
128 pcmk__sched_warn("Guest node %s will be fenced "
129 "(by recovering its guest resource %s): %s",
130 pcmk__node_name(node), rsc->id, reason);
131
132 /* We don't mark the node as unclean because that would prevent the
133 * node from running resources. We want to allow it to run resources
134 * in this transition if the recovery succeeds.
135 */
136 node->details->remote_requires_reset = TRUE;
139 }
140 }
141
142 } else if (is_dangling_guest_node(node)) {
143 crm_info("Cleaning up dangling connection for guest node %s: "
144 "fencing was already done because %s, "
145 "and guest resource no longer exists",
146 pcmk__node_name(node), reason);
149
150 } else if (pcmk__is_remote_node(node)) {
151 pcmk_resource_t *rsc = node->details->remote_rsc;
152
153 if ((rsc != NULL) && !pcmk_is_set(rsc->flags, pcmk_rsc_managed)) {
154 crm_notice("Not fencing remote node %s "
155 "(otherwise would because %s): connection is unmanaged",
156 pcmk__node_name(node), reason);
157 } else if(node->details->remote_requires_reset == FALSE) {
158 node->details->remote_requires_reset = TRUE;
159 pcmk__sched_warn("Remote node %s %s: %s",
160 pcmk__node_name(node),
161 pe_can_fence(scheduler, node)? "will be fenced" : "is unclean",
162 reason);
163 }
164 node->details->unclean = TRUE;
165 // No need to apply PCMK_OPT_PRIORITY_FENCING_DELAY for remote nodes
166 pe_fence_op(node, NULL, TRUE, reason, FALSE, scheduler);
167
168 } else if (node->details->unclean) {
169 crm_trace("Cluster node %s %s because %s",
170 pcmk__node_name(node),
171 pe_can_fence(scheduler, node)? "would also be fenced" : "also is unclean",
172 reason);
173
174 } else {
175 pcmk__sched_warn("Cluster node %s %s: %s",
176 pcmk__node_name(node),
177 pe_can_fence(scheduler, node)? "will be fenced" : "is unclean",
178 reason);
179 node->details->unclean = TRUE;
180 pe_fence_op(node, NULL, TRUE, reason, priority_delay, scheduler);
181 }
182}
183
184// @TODO xpaths can't handle templates, rules, or id-refs
185
186// nvpair with provides or requires set to unfencing
187#define XPATH_UNFENCING_NVPAIR PCMK_XE_NVPAIR \
188 "[(@" PCMK_XA_NAME "='" PCMK_STONITH_PROVIDES "'" \
189 "or @" PCMK_XA_NAME "='" PCMK_META_REQUIRES "') " \
190 "and @" PCMK_XA_VALUE "='" PCMK_VALUE_UNFENCING "']"
191
192// unfencing in rsc_defaults or any resource
193#define XPATH_ENABLE_UNFENCING \
194 "/" PCMK_XE_CIB "/" PCMK_XE_CONFIGURATION "/" PCMK_XE_RESOURCES \
195 "//" PCMK_XE_META_ATTRIBUTES "/" XPATH_UNFENCING_NVPAIR \
196 "|/" PCMK_XE_CIB "/" PCMK_XE_CONFIGURATION "/" PCMK_XE_RSC_DEFAULTS \
197 "/" PCMK_XE_META_ATTRIBUTES "/" XPATH_UNFENCING_NVPAIR
198
199static void
200set_if_xpath(uint64_t flag, const char *xpath, pcmk_scheduler_t *scheduler)
201{
202 xmlXPathObjectPtr result = NULL;
203
204 if (!pcmk_is_set(scheduler->flags, flag)) {
206 if (result && (numXpathResults(result) > 0)) {
208 }
210 }
211}
212
213gboolean
215{
216 const char *value = NULL;
217 guint interval_ms = 0U;
218 GHashTable *config_hash = pcmk__strkey_table(free, free);
219
220 pe_rule_eval_data_t rule_data = {
221 .node_hash = NULL,
222 .now = scheduler->now,
223 .match_data = NULL,
224 .rsc_data = NULL,
225 .op_data = NULL
226 };
227
228 scheduler->config_hash = config_hash;
229
232 FALSE, scheduler);
233
235
239 crm_info("Startup probes: disabled (dangerous)");
240 }
241
242 value = pcmk__cluster_option(config_hash, PCMK_OPT_HAVE_WATCHDOG);
243 if (value && crm_is_true(value)) {
244 crm_info("Watchdog-based self-fencing will be performed via SBD if "
245 "fencing is required and " PCMK_OPT_STONITH_WATCHDOG_TIMEOUT
246 " is nonzero");
248 }
249
250 /* Set certain flags via xpath here, so they can be used before the relevant
251 * configuration sections are unpacked.
252 */
254 scheduler);
255
257 pcmk_parse_interval_spec(value, &interval_ms);
258
259 if (interval_ms >= INT_MAX) {
260 scheduler->stonith_timeout = INT_MAX;
261 } else {
262 scheduler->stonith_timeout = (int) interval_ms;
263 }
264 crm_debug("STONITH timeout: %d", scheduler->stonith_timeout);
265
269 crm_debug("STONITH of failed nodes is enabled");
270 } else {
271 crm_debug("STONITH of failed nodes is disabled");
272 }
273
278 "Support for " PCMK_OPT_STONITH_ACTION " of "
279 "'" PCMK__ACTION_POWEROFF "' is deprecated and will be "
280 "removed in a future release "
281 "(use '" PCMK_ACTION_OFF "' instead)");
283 }
284 crm_trace("STONITH will %s nodes", scheduler->stonith_action);
285
289 crm_debug("Concurrent fencing is enabled");
290 } else {
291 crm_debug("Concurrent fencing is disabled");
292 }
293
295 if (value) {
296 pcmk_parse_interval_spec(value, &interval_ms);
297 scheduler->priority_fencing_delay = (int) (interval_ms / 1000);
298 crm_trace("Priority fencing delay is %ds",
300 }
301
304 crm_debug("Stop all active resources: %s",
305 pcmk__flag_text(scheduler->flags, pcmk_sched_stop_all));
306
310 crm_debug("Cluster is symmetric" " - resources can run anywhere by default");
311 }
312
314
315 if (pcmk__str_eq(value, PCMK_VALUE_IGNORE, pcmk__str_casei)) {
317
318 } else if (pcmk__str_eq(value, PCMK_VALUE_FREEZE, pcmk__str_casei)) {
320
321 } else if (pcmk__str_eq(value, PCMK_VALUE_DEMOTE, pcmk__str_casei)) {
323
324 } else if (pcmk__str_eq(value, PCMK_VALUE_FENCE_LEGACY, pcmk__str_casei)) {
326 int do_panic = 0;
327
329 &do_panic);
330 if (do_panic || pcmk_is_set(scheduler->flags, pcmk_sched_quorate)) {
332 } else {
334 " to 'stop': cluster has never had quorum");
336 }
337 } else {
339 " to 'stop' because fencing is disabled");
341 }
342
343 } else {
345 }
346
347 switch (scheduler->no_quorum_policy) {
349 crm_debug("On loss of quorum: Freeze resources");
350 break;
352 crm_debug("On loss of quorum: Stop ALL resources");
353 break;
355 crm_debug("On loss of quorum: "
356 "Demote promotable resources and stop other resources");
357 break;
359 crm_notice("On loss of quorum: Fence all remaining nodes");
360 break;
362 crm_notice("On loss of quorum: Ignore");
363 break;
364 }
365
369 crm_trace("Orphan resources are stopped");
370 } else {
371 crm_trace("Orphan resources are ignored");
372 }
373
377 crm_trace("Orphan resource actions are stopped");
378 } else {
379 crm_trace("Orphan resource actions are ignored");
380 }
381
383 if (value != NULL) {
384 if (crm_is_true(value)) {
387 "Support for the " PCMK__OPT_REMOVE_AFTER_STOP
388 " cluster property is deprecated and will be "
389 "removed in a future release");
390 } else {
393 }
394 }
395
398 crm_trace("Maintenance mode: %s",
399 pcmk__flag_text(scheduler->flags, pcmk_sched_in_maintenance));
400
404 crm_trace("Start failures are always fatal");
405 } else {
406 crm_trace("Start failures are handled by failcount");
407 }
408
412 }
414 crm_trace("Unseen nodes will be fenced");
415 } else {
417 "Blind faith: not fencing unseen nodes");
418 }
419
421
424 crm_trace("Placement strategy: %s", scheduler->placement_strategy);
425
431 scheduler->shutdown_lock /= 1000;
432 crm_trace("Resources will be locked to nodes that were cleanly "
433 "shut down (locks expire after %s)",
435 } else {
436 crm_trace("Resources will not be locked to nodes that were cleanly "
437 "shut down");
438 }
439
444 crm_trace("Do not fence pending nodes");
445 } else {
446 crm_trace("Fence pending nodes after %s",
448 * 1000));
449 }
450
451 return TRUE;
452}
453
455pe_create_node(const char *id, const char *uname, const char *type,
456 const char *score, pcmk_scheduler_t *scheduler)
457{
458 pcmk_node_t *new_node = NULL;
459
460 if (pcmk_find_node(scheduler, uname) != NULL) {
461 pcmk__config_warn("More than one node entry has name '%s'", uname);
462 }
463
464 new_node = calloc(1, sizeof(pcmk_node_t));
465 if (new_node == NULL) {
466 pcmk__sched_err("Could not allocate memory for node %s", uname);
467 return NULL;
468 }
469
470 new_node->weight = char2score(score);
471 new_node->details = calloc(1, sizeof(struct pe_node_shared_s));
472
473 if (new_node->details == NULL) {
474 free(new_node);
475 pcmk__sched_err("Could not allocate memory for node %s", uname);
476 return NULL;
477 }
478
479 crm_trace("Creating node for entry %s/%s", uname, id);
480 new_node->details->id = id;
481 new_node->details->uname = uname;
482 new_node->details->online = FALSE;
483 new_node->details->shutdown = FALSE;
484 new_node->details->rsc_discovery_enabled = TRUE;
485 new_node->details->running_rsc = NULL;
486 new_node->details->data_set = scheduler;
487
488 if (pcmk__str_eq(type, PCMK_VALUE_MEMBER,
491
492 } else if (pcmk__str_eq(type, PCMK_VALUE_REMOTE, pcmk__str_casei)) {
495
496 } else {
497 /* @COMPAT 'ping' is the default for backward compatibility, but it
498 * should be changed to 'member' at a compatibility break
499 */
500 if (!pcmk__str_eq(type, PCMK__VALUE_PING, pcmk__str_casei)) {
501 pcmk__config_warn("Node %s has unrecognized type '%s', "
502 "assuming '" PCMK__VALUE_PING "'",
503 pcmk__s(uname, "without name"), type);
504 }
506 "Support for nodes of type '" PCMK__VALUE_PING "' "
507 "(such as %s) is deprecated and will be removed in a "
508 "future release",
509 pcmk__s(uname, "unnamed node"));
510 new_node->details->type = node_ping;
511 }
512
513 new_node->details->attrs = pcmk__strkey_table(free, free);
514
515 if (pcmk__is_pacemaker_remote_node(new_node)) {
516 pcmk__insert_dup(new_node->details->attrs, CRM_ATTR_KIND, "remote");
517 } else {
518 pcmk__insert_dup(new_node->details->attrs, CRM_ATTR_KIND, "cluster");
519 }
520
521 new_node->details->utilization = pcmk__strkey_table(free, free);
522 new_node->details->digest_cache = pcmk__strkey_table(free,
524
525 scheduler->nodes = g_list_insert_sorted(scheduler->nodes, new_node,
527 return new_node;
528}
529
530static const char *
531expand_remote_rsc_meta(xmlNode *xml_obj, xmlNode *parent, pcmk_scheduler_t *data)
532{
533 xmlNode *attr_set = NULL;
534 xmlNode *attr = NULL;
535
536 const char *container_id = pcmk__xe_id(xml_obj);
537 const char *remote_name = NULL;
538 const char *remote_server = NULL;
539 const char *remote_port = NULL;
540 const char *connect_timeout = "60s";
541 const char *remote_allow_migrate=NULL;
542 const char *is_managed = NULL;
543
544 for (attr_set = pcmk__xe_first_child(xml_obj, NULL, NULL, NULL);
545 attr_set != NULL; attr_set = pcmk__xe_next(attr_set)) {
546
547 if (!pcmk__xe_is(attr_set, PCMK_XE_META_ATTRIBUTES)) {
548 continue;
549 }
550
551 for (attr = pcmk__xe_first_child(attr_set, NULL, NULL, NULL);
552 attr != NULL; attr = pcmk__xe_next(attr)) {
553
554 const char *value = crm_element_value(attr, PCMK_XA_VALUE);
555 const char *name = crm_element_value(attr, PCMK_XA_NAME);
556
557 if (name == NULL) { // Sanity
558 continue;
559 }
560
561 if (strcmp(name, PCMK_META_REMOTE_NODE) == 0) {
562 remote_name = value;
563
564 } else if (strcmp(name, PCMK_META_REMOTE_ADDR) == 0) {
565 remote_server = value;
566
567 } else if (strcmp(name, PCMK_META_REMOTE_PORT) == 0) {
568 remote_port = value;
569
570 } else if (strcmp(name, PCMK_META_REMOTE_CONNECT_TIMEOUT) == 0) {
571 connect_timeout = value;
572
573 } else if (strcmp(name, PCMK_META_REMOTE_ALLOW_MIGRATE) == 0) {
574 remote_allow_migrate = value;
575
576 } else if (strcmp(name, PCMK_META_IS_MANAGED) == 0) {
577 is_managed = value;
578 }
579 }
580 }
581
582 if (remote_name == NULL) {
583 return NULL;
584 }
585
586 if (pe_find_resource(data->resources, remote_name) != NULL) {
587 return NULL;
588 }
589
590 pe_create_remote_xml(parent, remote_name, container_id,
591 remote_allow_migrate, is_managed,
592 connect_timeout, remote_server, remote_port);
593 return remote_name;
594}
595
596static void
597handle_startup_fencing(pcmk_scheduler_t *scheduler, pcmk_node_t *new_node)
598{
599 if ((new_node->details->type == pcmk_node_variant_remote)
600 && (new_node->details->remote_rsc == NULL)) {
601 /* Ignore fencing for remote nodes that don't have a connection resource
602 * associated with them. This happens when remote node entries get left
603 * in the nodes section after the connection resource is removed.
604 */
605 return;
606 }
607
609 // All nodes are unclean until we've seen their status entry
610 new_node->details->unclean = TRUE;
611
612 } else {
613 // Blind faith ...
614 new_node->details->unclean = FALSE;
615 }
616
617 /* We need to be able to determine if a node's status section
618 * exists or not separate from whether the node is unclean. */
619 new_node->details->unseen = TRUE;
620}
621
622gboolean
624{
625 xmlNode *xml_obj = NULL;
626 pcmk_node_t *new_node = NULL;
627 const char *id = NULL;
628 const char *uname = NULL;
629 const char *type = NULL;
630 const char *score = NULL;
631
632 for (xml_obj = pcmk__xe_first_child(xml_nodes, NULL, NULL, NULL);
633 xml_obj != NULL; xml_obj = pcmk__xe_next(xml_obj)) {
634
635 if (pcmk__xe_is(xml_obj, PCMK_XE_NODE)) {
636 new_node = NULL;
637
638 id = crm_element_value(xml_obj, PCMK_XA_ID);
641 score = crm_element_value(xml_obj, PCMK_XA_SCORE);
642 crm_trace("Processing node %s/%s", uname, id);
643
644 if (id == NULL) {
645 pcmk__config_err("Ignoring <" PCMK_XE_NODE
646 "> entry in configuration without id");
647 continue;
648 }
649 new_node = pe_create_node(id, uname, type, score, scheduler);
650
651 if (new_node == NULL) {
652 return FALSE;
653 }
654
655 handle_startup_fencing(scheduler, new_node);
656
657 add_node_attrs(xml_obj, new_node, FALSE, scheduler);
658
659 crm_trace("Done with node %s",
661 }
662 }
663
666 crm_info("Creating a fake local node");
668 scheduler);
669 }
670
671 return TRUE;
672}
673
674static void
675setup_container(pcmk_resource_t *rsc, pcmk_scheduler_t *scheduler)
676{
677 const char *container_id = NULL;
678
679 if (rsc->children) {
680 g_list_foreach(rsc->children, (GFunc) setup_container, scheduler);
681 return;
682 }
683
684 container_id = g_hash_table_lookup(rsc->meta, PCMK__META_CONTAINER);
685 if (container_id && !pcmk__str_eq(container_id, rsc->id, pcmk__str_casei)) {
687 container_id);
688
689 if (container) {
690 rsc->container = container;
692 container->fillers = g_list_append(container->fillers, rsc);
693 pcmk__rsc_trace(rsc, "Resource %s's container is %s",
694 rsc->id, container_id);
695 } else {
696 pcmk__config_err("Resource %s: Unknown resource container (%s)",
697 rsc->id, container_id);
698 }
699 }
700}
701
702gboolean
704{
705 xmlNode *xml_obj = NULL;
706
707 /* Create remote nodes and guest nodes from the resource configuration
708 * before unpacking resources.
709 */
710 for (xml_obj = pcmk__xe_first_child(xml_resources, NULL, NULL, NULL);
711 xml_obj != NULL; xml_obj = pcmk__xe_next(xml_obj)) {
712
713 const char *new_node_id = NULL;
714
715 /* Check for remote nodes, which are defined by ocf:pacemaker:remote
716 * primitives.
717 */
718 if (xml_contains_remote_node(xml_obj)) {
719 new_node_id = pcmk__xe_id(xml_obj);
720 /* The pcmk_find_node() check ensures we don't iterate over an
721 * expanded node that has already been added to the node list
722 */
723 if (new_node_id
724 && (pcmk_find_node(scheduler, new_node_id) == NULL)) {
725 crm_trace("Found remote node %s defined by resource %s",
726 new_node_id, pcmk__xe_id(xml_obj));
727 pe_create_node(new_node_id, new_node_id, PCMK_VALUE_REMOTE,
728 NULL, scheduler);
729 }
730 continue;
731 }
732
733 /* Check for guest nodes, which are defined by special meta-attributes
734 * of a primitive of any type (for example, VirtualDomain or Xen).
735 */
736 if (pcmk__xe_is(xml_obj, PCMK_XE_PRIMITIVE)) {
737 /* This will add an ocf:pacemaker:remote primitive to the
738 * configuration for the guest node's connection, to be unpacked
739 * later.
740 */
741 new_node_id = expand_remote_rsc_meta(xml_obj, xml_resources,
742 scheduler);
743 if (new_node_id
744 && (pcmk_find_node(scheduler, new_node_id) == NULL)) {
745 crm_trace("Found guest node %s in resource %s",
746 new_node_id, pcmk__xe_id(xml_obj));
747 pe_create_node(new_node_id, new_node_id, PCMK_VALUE_REMOTE,
748 NULL, scheduler);
749 }
750 continue;
751 }
752
753 /* Check for guest nodes inside a group. Clones are currently not
754 * supported as guest nodes.
755 */
756 if (pcmk__xe_is(xml_obj, PCMK_XE_GROUP)) {
757 xmlNode *xml_obj2 = NULL;
758 for (xml_obj2 = pcmk__xe_first_child(xml_obj, NULL, NULL, NULL);
759 xml_obj2 != NULL; xml_obj2 = pcmk__xe_next(xml_obj2)) {
760
761 new_node_id = expand_remote_rsc_meta(xml_obj2, xml_resources,
762 scheduler);
763
764 if (new_node_id
765 && (pcmk_find_node(scheduler, new_node_id) == NULL)) {
766 crm_trace("Found guest node %s in resource %s inside group %s",
767 new_node_id, pcmk__xe_id(xml_obj2),
768 pcmk__xe_id(xml_obj));
769 pe_create_node(new_node_id, new_node_id, PCMK_VALUE_REMOTE,
770 NULL, scheduler);
771 }
772 }
773 }
774 }
775 return TRUE;
776}
777
778/* Call this after all the nodes and resources have been
779 * unpacked, but before the status section is read.
780 *
781 * A remote node's online status is reflected by the state
782 * of the remote node's connection resource. We need to link
783 * the remote node to this connection resource so we can have
784 * easy access to the connection resource during the scheduler calculations.
785 */
786static void
787link_rsc2remotenode(pcmk_scheduler_t *scheduler, pcmk_resource_t *new_rsc)
788{
789 pcmk_node_t *remote_node = NULL;
790
791 if (new_rsc->is_remote_node == FALSE) {
792 return;
793 }
794
796 /* remote_nodes and remote_resources are not linked in quick location calculations */
797 return;
798 }
799
800 remote_node = pcmk_find_node(scheduler, new_rsc->id);
801 CRM_CHECK(remote_node != NULL, return);
802
803 pcmk__rsc_trace(new_rsc, "Linking remote connection resource %s to %s",
804 new_rsc->id, pcmk__node_name(remote_node));
805 remote_node->details->remote_rsc = new_rsc;
806
807 if (new_rsc->container == NULL) {
808 /* Handle start-up fencing for remote nodes (as opposed to guest nodes)
809 * the same as is done for cluster nodes.
810 */
811 handle_startup_fencing(scheduler, remote_node);
812
813 } else {
814 /* pe_create_node() marks the new node as "remote" or "cluster"; now
815 * that we know the node is a guest node, update it correctly.
816 */
817 pcmk__insert_dup(remote_node->details->attrs,
818 CRM_ATTR_KIND, "container");
819 }
820}
821
822static void
823destroy_tag(gpointer data)
824{
825 pcmk_tag_t *tag = data;
826
827 if (tag) {
828 free(tag->id);
829 g_list_free_full(tag->refs, free);
830 free(tag);
831 }
832}
833
846gboolean
847unpack_resources(const xmlNode *xml_resources, pcmk_scheduler_t *scheduler)
848{
849 xmlNode *xml_obj = NULL;
850 GList *gIter = NULL;
851
852 scheduler->template_rsc_sets = pcmk__strkey_table(free, destroy_tag);
853
854 for (xml_obj = pcmk__xe_first_child(xml_resources, NULL, NULL, NULL);
855 xml_obj != NULL; xml_obj = pcmk__xe_next(xml_obj)) {
856
857 pcmk_resource_t *new_rsc = NULL;
858 const char *id = pcmk__xe_id(xml_obj);
859
860 if (pcmk__str_empty(id)) {
861 pcmk__config_err("Ignoring <%s> resource without ID",
862 xml_obj->name);
863 continue;
864 }
865
866 if (pcmk__xe_is(xml_obj, PCMK_XE_TEMPLATE)) {
867 if (g_hash_table_lookup_extended(scheduler->template_rsc_sets, id,
868 NULL, NULL) == FALSE) {
869 /* Record the template's ID for the knowledge of its existence anyway. */
871 }
872 continue;
873 }
874
875 crm_trace("Unpacking <%s " PCMK_XA_ID "='%s'>", xml_obj->name, id);
876 if (pe__unpack_resource(xml_obj, &new_rsc, NULL,
877 scheduler) == pcmk_rc_ok) {
878 scheduler->resources = g_list_append(scheduler->resources, new_rsc);
879 pcmk__rsc_trace(new_rsc, "Added resource %s", new_rsc->id);
880
881 } else {
882 pcmk__config_err("Ignoring <%s> resource '%s' "
883 "because configuration is invalid",
884 xml_obj->name, id);
885 }
886 }
887
888 for (gIter = scheduler->resources; gIter != NULL; gIter = gIter->next) {
889 pcmk_resource_t *rsc = (pcmk_resource_t *) gIter->data;
890
891 setup_container(rsc, scheduler);
892 link_rsc2remotenode(scheduler, rsc);
893 }
894
895 scheduler->resources = g_list_sort(scheduler->resources,
898 /* Ignore */
899
902
903 pcmk__config_err("Resource start-up disabled since no STONITH resources have been defined");
904 pcmk__config_err("Either configure some or disable STONITH with the "
905 PCMK_OPT_STONITH_ENABLED " option");
906 pcmk__config_err("NOTE: Clusters with shared data need STONITH to ensure data integrity");
907 }
908
909 return TRUE;
910}
911
912gboolean
914{
915 xmlNode *xml_tag = NULL;
916
917 scheduler->tags = pcmk__strkey_table(free, destroy_tag);
918
919 for (xml_tag = pcmk__xe_first_child(xml_tags, NULL, NULL, NULL);
920 xml_tag != NULL; xml_tag = pcmk__xe_next(xml_tag)) {
921
922 xmlNode *xml_obj_ref = NULL;
923 const char *tag_id = pcmk__xe_id(xml_tag);
924
925 if (!pcmk__xe_is(xml_tag, PCMK_XE_TAG)) {
926 continue;
927 }
928
929 if (tag_id == NULL) {
930 pcmk__config_err("Ignoring <%s> without " PCMK_XA_ID,
931 (const char *) xml_tag->name);
932 continue;
933 }
934
935 for (xml_obj_ref = pcmk__xe_first_child(xml_tag, NULL, NULL, NULL);
936 xml_obj_ref != NULL; xml_obj_ref = pcmk__xe_next(xml_obj_ref)) {
937
938 const char *obj_ref = pcmk__xe_id(xml_obj_ref);
939
940 if (!pcmk__xe_is(xml_obj_ref, PCMK_XE_OBJ_REF)) {
941 continue;
942 }
943
944 if (obj_ref == NULL) {
945 pcmk__config_err("Ignoring <%s> for tag '%s' without " PCMK_XA_ID,
946 xml_obj_ref->name, tag_id);
947 continue;
948 }
949
950 if (add_tag_ref(scheduler->tags, tag_id, obj_ref) == FALSE) {
951 return FALSE;
952 }
953 }
954 }
955
956 return TRUE;
957}
958
959/* The ticket state section:
960 * "/cib/status/tickets/ticket_state" */
961static gboolean
962unpack_ticket_state(xmlNode *xml_ticket, pcmk_scheduler_t *scheduler)
963{
964 const char *ticket_id = NULL;
965 const char *granted = NULL;
966 const char *last_granted = NULL;
967 const char *standby = NULL;
968 xmlAttrPtr xIter = NULL;
969
970 pcmk_ticket_t *ticket = NULL;
971
972 ticket_id = pcmk__xe_id(xml_ticket);
973 if (pcmk__str_empty(ticket_id)) {
974 return FALSE;
975 }
976
977 crm_trace("Processing ticket state for %s", ticket_id);
978
979 ticket = g_hash_table_lookup(scheduler->tickets, ticket_id);
980 if (ticket == NULL) {
981 ticket = ticket_new(ticket_id, scheduler);
982 if (ticket == NULL) {
983 return FALSE;
984 }
985 }
986
987 for (xIter = xml_ticket->properties; xIter; xIter = xIter->next) {
988 const char *prop_name = (const char *)xIter->name;
989 const char *prop_value = pcmk__xml_attr_value(xIter);
990
991 if (pcmk__str_eq(prop_name, PCMK_XA_ID, pcmk__str_none)) {
992 continue;
993 }
994 pcmk__insert_dup(ticket->state, prop_name, prop_value);
995 }
996
997 granted = g_hash_table_lookup(ticket->state, PCMK__XA_GRANTED);
998 if (granted && crm_is_true(granted)) {
999 ticket->granted = TRUE;
1000 crm_info("We have ticket '%s'", ticket->id);
1001 } else {
1002 ticket->granted = FALSE;
1003 crm_info("We do not have ticket '%s'", ticket->id);
1004 }
1005
1006 last_granted = g_hash_table_lookup(ticket->state, PCMK_XA_LAST_GRANTED);
1007 if (last_granted) {
1008 long long last_granted_ll;
1009
1010 pcmk__scan_ll(last_granted, &last_granted_ll, 0LL);
1011 ticket->last_granted = (time_t) last_granted_ll;
1012 }
1013
1014 standby = g_hash_table_lookup(ticket->state, PCMK_XA_STANDBY);
1015 if (standby && crm_is_true(standby)) {
1016 ticket->standby = TRUE;
1017 if (ticket->granted) {
1018 crm_info("Granted ticket '%s' is in standby-mode", ticket->id);
1019 }
1020 } else {
1021 ticket->standby = FALSE;
1022 }
1023
1024 crm_trace("Done with ticket state for %s", ticket_id);
1025
1026 return TRUE;
1027}
1028
1029static gboolean
1030unpack_tickets_state(xmlNode *xml_tickets, pcmk_scheduler_t *scheduler)
1031{
1032 xmlNode *xml_obj = NULL;
1033
1034 for (xml_obj = pcmk__xe_first_child(xml_tickets, NULL, NULL, NULL);
1035 xml_obj != NULL; xml_obj = pcmk__xe_next(xml_obj)) {
1036
1037 if (!pcmk__xe_is(xml_obj, PCMK__XE_TICKET_STATE)) {
1038 continue;
1039 }
1040 unpack_ticket_state(xml_obj, scheduler);
1041 }
1042
1043 return TRUE;
1044}
1045
1046static void
1047unpack_handle_remote_attrs(pcmk_node_t *this_node, const xmlNode *state,
1049{
1050 const char *discovery = NULL;
1051 const xmlNode *attrs = NULL;
1052 pcmk_resource_t *rsc = NULL;
1053
1054 if (!pcmk__xe_is(state, PCMK__XE_NODE_STATE)) {
1055 return;
1056 }
1057
1058 if ((this_node == NULL) || !pcmk__is_pacemaker_remote_node(this_node)) {
1059 return;
1060 }
1061 crm_trace("Processing Pacemaker Remote node %s",
1062 pcmk__node_name(this_node));
1063
1065 &(this_node->details->remote_maintenance), 0);
1066
1067 rsc = this_node->details->remote_rsc;
1068 if (this_node->details->remote_requires_reset == FALSE) {
1069 this_node->details->unclean = FALSE;
1070 this_node->details->unseen = FALSE;
1071 }
1073 NULL);
1074 add_node_attrs(attrs, this_node, TRUE, scheduler);
1075
1076 if (pe__shutdown_requested(this_node)) {
1077 crm_info("%s is shutting down", pcmk__node_name(this_node));
1078 this_node->details->shutdown = TRUE;
1079 }
1080
1083 crm_info("%s is in standby mode", pcmk__node_name(this_node));
1084 this_node->details->standby = TRUE;
1085 }
1086
1089 || ((rsc != NULL) && !pcmk_is_set(rsc->flags, pcmk_rsc_managed))) {
1090 crm_info("%s is in maintenance mode", pcmk__node_name(this_node));
1091 this_node->details->maintenance = TRUE;
1092 }
1093
1094 discovery = pcmk__node_attr(this_node,
1097 if ((discovery != NULL) && !crm_is_true(discovery)) {
1099 "Support for the "
1101 " node attribute is deprecated and will be removed"
1102 " (and behave as 'true') in a future release.");
1103
1104 if (pcmk__is_remote_node(this_node)
1106 pcmk__config_warn("Ignoring "
1108 " attribute on Pacemaker Remote node %s"
1109 " because fencing is disabled",
1110 pcmk__node_name(this_node));
1111 } else {
1112 /* This is either a remote node with fencing enabled, or a guest
1113 * node. We don't care whether fencing is enabled when fencing guest
1114 * nodes, because they are "fenced" by recovering their containing
1115 * resource.
1116 */
1117 crm_info("%s has resource discovery disabled",
1118 pcmk__node_name(this_node));
1119 this_node->details->rsc_discovery_enabled = FALSE;
1120 }
1121 }
1122}
1123
1132static void
1133unpack_transient_attributes(const xmlNode *state, pcmk_node_t *node,
1135{
1136 const char *discovery = NULL;
1137 const xmlNode *attrs = pcmk__xe_first_child(state,
1139 NULL, NULL);
1140
1141 add_node_attrs(attrs, node, TRUE, scheduler);
1142
1145 crm_info("%s is in standby mode", pcmk__node_name(node));
1146 node->details->standby = TRUE;
1147 }
1148
1151 crm_info("%s is in maintenance mode", pcmk__node_name(node));
1152 node->details->maintenance = TRUE;
1153 }
1154
1155 discovery = pcmk__node_attr(node,
1158 if ((discovery != NULL) && !crm_is_true(discovery)) {
1159 pcmk__config_warn("Ignoring "
1161 " attribute for %s because disabling resource"
1162 " discovery is not allowed for cluster nodes",
1163 pcmk__node_name(node));
1164 }
1165}
1166
1179static void
1180unpack_node_state(const xmlNode *state, pcmk_scheduler_t *scheduler)
1181{
1182 const char *id = NULL;
1183 const char *uname = NULL;
1184 pcmk_node_t *this_node = NULL;
1185
1186 id = crm_element_value(state, PCMK_XA_ID);
1187 if (id == NULL) {
1188 pcmk__config_err("Ignoring invalid " PCMK__XE_NODE_STATE " entry without "
1189 PCMK_XA_ID);
1190 crm_log_xml_info(state, "missing-id");
1191 return;
1192 }
1193
1195 if (uname == NULL) {
1196 /* If a joining peer makes the cluster acquire the quorum from corosync
1197 * meanwhile it has not joined CPG membership of pacemaker-controld yet,
1198 * it's possible that the created PCMK__XE_NODE_STATE entry doesn't have
1199 * a PCMK_XA_UNAME yet. We should recognize the node as `pending` and
1200 * wait for it to join CPG.
1201 */
1202 crm_trace("Handling " PCMK__XE_NODE_STATE " entry with id=\"%s\" "
1203 "without " PCMK_XA_UNAME,
1204 id);
1205 }
1206
1207 this_node = pe_find_node_any(scheduler->nodes, id, uname);
1208 if (this_node == NULL) {
1209 crm_notice("Ignoring recorded state for removed node with name %s and "
1210 PCMK_XA_ID " %s", pcmk__s(uname, "unknown"), id);
1211 return;
1212 }
1213
1214 if (pcmk__is_pacemaker_remote_node(this_node)) {
1215 /* We can't determine the online status of Pacemaker Remote nodes until
1216 * after all resource history has been unpacked. In this first pass, we
1217 * do need to mark whether the node has been fenced, as this plays a
1218 * role during unpacking cluster node resource state.
1219 */
1221 &(this_node->details->remote_was_fenced), 0);
1222 return;
1223 }
1224
1225 unpack_transient_attributes(state, this_node, scheduler);
1226
1227 /* Provisionally mark this cluster node as clean. We have at least seen it
1228 * in the current cluster's lifetime.
1229 */
1230 this_node->details->unclean = FALSE;
1231 this_node->details->unseen = FALSE;
1232
1233 crm_trace("Determining online status of cluster node %s (id %s)",
1234 pcmk__node_name(this_node), id);
1235 determine_online_status(state, this_node, scheduler);
1236
1238 && this_node->details->online
1240 /* Everything else should flow from this automatically
1241 * (at least until the scheduler becomes able to migrate off
1242 * healthy resources)
1243 */
1244 pe_fence_node(scheduler, this_node, "cluster does not have quorum",
1245 FALSE);
1246 }
1247}
1248
1266static int
1267unpack_node_history(const xmlNode *status, bool fence,
1269{
1270 int rc = pcmk_rc_ok;
1271
1272 // Loop through all PCMK__XE_NODE_STATE entries in CIB status
1273 for (const xmlNode *state = pcmk__xe_first_child(status,
1274 PCMK__XE_NODE_STATE, NULL,
1275 NULL);
1276 state != NULL; state = pcmk__xe_next_same(state)) {
1277
1278 const char *id = pcmk__xe_id(state);
1279 const char *uname = crm_element_value(state, PCMK_XA_UNAME);
1280 pcmk_node_t *this_node = NULL;
1281
1282 if ((id == NULL) || (uname == NULL)) {
1283 // Warning already logged in first pass through status section
1284 crm_trace("Not unpacking resource history from malformed "
1285 PCMK__XE_NODE_STATE " without id and/or uname");
1286 continue;
1287 }
1288
1289 this_node = pe_find_node_any(scheduler->nodes, id, uname);
1290 if (this_node == NULL) {
1291 // Warning already logged in first pass through status section
1292 crm_trace("Not unpacking resource history for node %s because "
1293 "no longer in configuration", id);
1294 continue;
1295 }
1296
1297 if (this_node->details->unpacked) {
1298 crm_trace("Not unpacking resource history for node %s because "
1299 "already unpacked", id);
1300 continue;
1301 }
1302
1303 if (fence) {
1304 // We're processing all remaining nodes
1305
1306 } else if (pcmk__is_guest_or_bundle_node(this_node)) {
1307 /* We can unpack a guest node's history only after we've unpacked
1308 * other resource history to the point that we know that the node's
1309 * connection and containing resource are both up.
1310 */
1311 pcmk_resource_t *rsc = this_node->details->remote_rsc;
1312
1313 if ((rsc == NULL) || (rsc->role != pcmk_role_started)
1314 || (rsc->container->role != pcmk_role_started)) {
1315 crm_trace("Not unpacking resource history for guest node %s "
1316 "because container and connection are not known to "
1317 "be up", id);
1318 continue;
1319 }
1320
1321 } else if (pcmk__is_remote_node(this_node)) {
1322 /* We can unpack a remote node's history only after we've unpacked
1323 * other resource history to the point that we know that the node's
1324 * connection is up, with the exception of when shutdown locks are
1325 * in use.
1326 */
1327 pcmk_resource_t *rsc = this_node->details->remote_rsc;
1328
1329 if ((rsc == NULL)
1331 && (rsc->role != pcmk_role_started))) {
1332 crm_trace("Not unpacking resource history for remote node %s "
1333 "because connection is not known to be up", id);
1334 continue;
1335 }
1336
1337 /* If fencing and shutdown locks are disabled and we're not processing
1338 * unseen nodes, then we don't want to unpack offline nodes until online
1339 * nodes have been unpacked. This allows us to number active clone
1340 * instances first.
1341 */
1342 } else if (!pcmk_any_flags_set(scheduler->flags,
1345 && !this_node->details->online) {
1346 crm_trace("Not unpacking resource history for offline "
1347 "cluster node %s", id);
1348 continue;
1349 }
1350
1351 if (pcmk__is_pacemaker_remote_node(this_node)) {
1352 determine_remote_online_status(scheduler, this_node);
1353 unpack_handle_remote_attrs(this_node, state, scheduler);
1354 }
1355
1356 crm_trace("Unpacking resource history for %snode %s",
1357 (fence? "unseen " : ""), id);
1358
1359 this_node->details->unpacked = TRUE;
1360 unpack_node_lrm(this_node, state, scheduler);
1361
1362 rc = EAGAIN; // Other node histories might depend on this one
1363 }
1364 return rc;
1365}
1366
1367/* remove nodes that are down, stopping */
1368/* create positive rsc_to_node constraints between resources and the nodes they are running on */
1369/* anything else? */
1370gboolean
1372{
1373 xmlNode *state = NULL;
1374
1375 crm_trace("Beginning unpack");
1376
1377 if (scheduler->tickets == NULL) {
1379 }
1380
1381 for (state = pcmk__xe_first_child(status, NULL, NULL, NULL); state != NULL;
1382 state = pcmk__xe_next(state)) {
1383
1384 if (pcmk__xe_is(state, PCMK_XE_TICKETS)) {
1385 unpack_tickets_state((xmlNode *) state, scheduler);
1386
1387 } else if (pcmk__xe_is(state, PCMK__XE_NODE_STATE)) {
1388 unpack_node_state(state, scheduler);
1389 }
1390 }
1391
1392 while (unpack_node_history(status, FALSE, scheduler) == EAGAIN) {
1393 crm_trace("Another pass through node resource histories is needed");
1394 }
1395
1396 // Now catch any nodes we didn't see
1397 unpack_node_history(status,
1400 scheduler);
1401
1402 /* Now that we know where resources are, we can schedule stops of containers
1403 * with failed bundle connections
1404 */
1405 if (scheduler->stop_needed != NULL) {
1406 for (GList *item = scheduler->stop_needed; item; item = item->next) {
1407 pcmk_resource_t *container = item->data;
1408 pcmk_node_t *node = pcmk__current_node(container);
1409
1410 if (node) {
1411 stop_action(container, node, FALSE);
1412 }
1413 }
1414 g_list_free(scheduler->stop_needed);
1415 scheduler->stop_needed = NULL;
1416 }
1417
1418 /* Now that we know status of all Pacemaker Remote connections and nodes,
1419 * we can stop connections for node shutdowns, and check the online status
1420 * of remote/guest nodes that didn't have any node history to unpack.
1421 */
1422 for (GList *gIter = scheduler->nodes; gIter != NULL; gIter = gIter->next) {
1423 pcmk_node_t *this_node = gIter->data;
1424
1425 if (!pcmk__is_pacemaker_remote_node(this_node)) {
1426 continue;
1427 }
1428 if (this_node->details->shutdown
1429 && (this_node->details->remote_rsc != NULL)) {
1431 "remote shutdown");
1432 }
1433 if (!this_node->details->unpacked) {
1434 determine_remote_online_status(scheduler, this_node);
1435 }
1436 }
1437
1438 return TRUE;
1439}
1440
1452static long long
1453unpack_node_member(const xmlNode *node_state, pcmk_scheduler_t *scheduler)
1454{
1455 const char *member_time = crm_element_value(node_state, PCMK__XA_IN_CCM);
1456 int member = 0;
1457
1458 if (member_time == NULL) {
1459 return -1LL;
1460
1461 } else if (crm_str_to_boolean(member_time, &member) == 1) {
1462 /* If in_ccm=0, we'll return 0 here. If in_ccm=1, either the entry was
1463 * recorded as a boolean for a DC < 2.1.7, or the node is pending
1464 * shutdown and has left the CPG, in which case it was set to 1 to avoid
1465 * fencing for PCMK_OPT_NODE_PENDING_TIMEOUT.
1466 *
1467 * We return the effective time for in_ccm=1 because what's important to
1468 * avoid fencing is that effective time minus this value is less than
1469 * the pending node timeout.
1470 */
1471 return member? (long long) get_effective_time(scheduler) : 0LL;
1472
1473 } else {
1474 long long when_member = 0LL;
1475
1476 if ((pcmk__scan_ll(member_time, &when_member,
1477 0LL) != pcmk_rc_ok) || (when_member < 0LL)) {
1478 crm_warn("Unrecognized value '%s' for " PCMK__XA_IN_CCM
1479 " in " PCMK__XE_NODE_STATE " entry", member_time);
1480 return -1LL;
1481 }
1482 return when_member;
1483 }
1484}
1485
1495static long long
1496unpack_node_online(const xmlNode *node_state)
1497{
1498 const char *peer_time = crm_element_value(node_state, PCMK_XA_CRMD);
1499
1500 // @COMPAT Entries recorded for DCs < 2.1.7 have "online" or "offline"
1501 if (pcmk__str_eq(peer_time, PCMK_VALUE_OFFLINE,
1503 return 0LL;
1504
1505 } else if (pcmk__str_eq(peer_time, PCMK_VALUE_ONLINE, pcmk__str_casei)) {
1506 return 1LL;
1507
1508 } else {
1509 long long when_online = 0LL;
1510
1511 if ((pcmk__scan_ll(peer_time, &when_online, 0LL) != pcmk_rc_ok)
1512 || (when_online < 0)) {
1513 crm_warn("Unrecognized value '%s' for " PCMK_XA_CRMD " in "
1514 PCMK__XE_NODE_STATE " entry, assuming offline", peer_time);
1515 return 0LL;
1516 }
1517 return when_online;
1518 }
1519}
1520
1530static bool
1531unpack_node_terminate(const pcmk_node_t *node, const xmlNode *node_state)
1532{
1533 long long value = 0LL;
1534 int value_i = 0;
1535 const char *value_s = pcmk__node_attr(node, PCMK_NODE_ATTR_TERMINATE,
1537
1538 // Value may be boolean or an epoch time
1539 if (crm_str_to_boolean(value_s, &value_i) == 1) {
1540 return (value_i != 0);
1541 }
1542 if (pcmk__scan_ll(value_s, &value, 0LL) == pcmk_rc_ok) {
1543 return (value > 0);
1544 }
1545 crm_warn("Ignoring unrecognized value '%s' for " PCMK_NODE_ATTR_TERMINATE
1546 "node attribute for %s", value_s, pcmk__node_name(node));
1547 return false;
1548}
1549
1550static gboolean
1551determine_online_status_no_fencing(pcmk_scheduler_t *scheduler,
1552 const xmlNode *node_state,
1553 pcmk_node_t *this_node)
1554{
1555 gboolean online = FALSE;
1556 const char *join = crm_element_value(node_state, PCMK__XA_JOIN);
1557 const char *exp_state = crm_element_value(node_state, PCMK_XA_EXPECTED);
1558 long long when_member = unpack_node_member(node_state, scheduler);
1559 long long when_online = unpack_node_online(node_state);
1560
1561 if (when_member <= 0) {
1562 crm_trace("Node %s is %sdown", pcmk__node_name(this_node),
1563 ((when_member < 0)? "presumed " : ""));
1564
1565 } else if (when_online > 0) {
1566 if (pcmk__str_eq(join, CRMD_JOINSTATE_MEMBER, pcmk__str_casei)) {
1567 online = TRUE;
1568 } else {
1569 crm_debug("Node %s is not ready to run resources: %s",
1570 pcmk__node_name(this_node), join);
1571 }
1572
1573 } else if (this_node->details->expected_up == FALSE) {
1574 crm_trace("Node %s controller is down: "
1575 "member@%lld online@%lld join=%s expected=%s",
1576 pcmk__node_name(this_node), when_member, when_online,
1577 pcmk__s(join, "<null>"), pcmk__s(exp_state, "<null>"));
1578
1579 } else {
1580 /* mark it unclean */
1581 pe_fence_node(scheduler, this_node, "peer is unexpectedly down", FALSE);
1582 crm_info("Node %s member@%lld online@%lld join=%s expected=%s",
1583 pcmk__node_name(this_node), when_member, when_online,
1584 pcmk__s(join, "<null>"), pcmk__s(exp_state, "<null>"));
1585 }
1586 return online;
1587}
1588
1602static inline bool
1603pending_too_long(pcmk_scheduler_t *scheduler, const pcmk_node_t *node,
1604 long long when_member, long long when_online)
1605{
1607 && (when_member > 0) && (when_online <= 0)) {
1608 // There is a timeout on pending nodes, and node is pending
1609
1610 time_t timeout = when_member + scheduler->node_pending_timeout;
1611
1612 if (get_effective_time(node->details->data_set) >= timeout) {
1613 return true; // Node has timed out
1614 }
1615
1616 // Node is pending, but still has time
1617 pe__update_recheck_time(timeout, scheduler, "pending node timeout");
1618 }
1619 return false;
1620}
1621
1622static bool
1623determine_online_status_fencing(pcmk_scheduler_t *scheduler,
1624 const xmlNode *node_state,
1625 pcmk_node_t *this_node)
1626{
1627 bool termination_requested = unpack_node_terminate(this_node, node_state);
1628 const char *join = crm_element_value(node_state, PCMK__XA_JOIN);
1629 const char *exp_state = crm_element_value(node_state, PCMK_XA_EXPECTED);
1630 long long when_member = unpack_node_member(node_state, scheduler);
1631 long long when_online = unpack_node_online(node_state);
1632
1633/*
1634 - PCMK__XA_JOIN ::= member|down|pending|banned
1635 - PCMK_XA_EXPECTED ::= member|down
1636
1637 @COMPAT with entries recorded for DCs < 2.1.7
1638 - PCMK__XA_IN_CCM ::= true|false
1639 - PCMK_XA_CRMD ::= online|offline
1640
1641 Since crm_feature_set 3.18.0 (pacemaker-2.1.7):
1642 - PCMK__XA_IN_CCM ::= <timestamp>|0
1643 Since when node has been a cluster member. A value 0 of means the node is not
1644 a cluster member.
1645
1646 - PCMK_XA_CRMD ::= <timestamp>|0
1647 Since when peer has been online in CPG. A value 0 means the peer is offline
1648 in CPG.
1649*/
1650
1651 crm_trace("Node %s member@%lld online@%lld join=%s expected=%s%s",
1652 pcmk__node_name(this_node), when_member, when_online,
1653 pcmk__s(join, "<null>"), pcmk__s(exp_state, "<null>"),
1654 (termination_requested? " (termination requested)" : ""));
1655
1656 if (this_node->details->shutdown) {
1657 crm_debug("%s is shutting down", pcmk__node_name(this_node));
1658
1659 /* Slightly different criteria since we can't shut down a dead peer */
1660 return (when_online > 0);
1661 }
1662
1663 if (when_member < 0) {
1664 pe_fence_node(scheduler, this_node,
1665 "peer has not been seen by the cluster", FALSE);
1666 return false;
1667 }
1668
1669 if (pcmk__str_eq(join, CRMD_JOINSTATE_NACK, pcmk__str_none)) {
1670 pe_fence_node(scheduler, this_node,
1671 "peer failed Pacemaker membership criteria", FALSE);
1672
1673 } else if (termination_requested) {
1674 if ((when_member <= 0) && (when_online <= 0)
1675 && pcmk__str_eq(join, CRMD_JOINSTATE_DOWN, pcmk__str_none)) {
1676 crm_info("%s was fenced as requested", pcmk__node_name(this_node));
1677 return false;
1678 }
1679 pe_fence_node(scheduler, this_node, "fencing was requested", false);
1680
1681 } else if (pcmk__str_eq(exp_state, CRMD_JOINSTATE_DOWN,
1683
1684 if (pending_too_long(scheduler, this_node, when_member, when_online)) {
1685 pe_fence_node(scheduler, this_node,
1686 "peer pending timed out on joining the process group",
1687 FALSE);
1688
1689 } else if ((when_member > 0) || (when_online > 0)) {
1690 crm_info("- %s is not ready to run resources",
1691 pcmk__node_name(this_node));
1692 this_node->details->standby = TRUE;
1693 this_node->details->pending = TRUE;
1694
1695 } else {
1696 crm_trace("%s is down or still coming up",
1697 pcmk__node_name(this_node));
1698 }
1699
1700 } else if (when_member <= 0) {
1701 // Consider PCMK_OPT_PRIORITY_FENCING_DELAY for lost nodes
1702 pe_fence_node(scheduler, this_node,
1703 "peer is no longer part of the cluster", TRUE);
1704
1705 } else if (when_online <= 0) {
1706 pe_fence_node(scheduler, this_node,
1707 "peer process is no longer available", FALSE);
1708
1709 /* Everything is running at this point, now check join state */
1710
1711 } else if (pcmk__str_eq(join, CRMD_JOINSTATE_MEMBER, pcmk__str_none)) {
1712 crm_info("%s is active", pcmk__node_name(this_node));
1713
1715 CRMD_JOINSTATE_DOWN, NULL)) {
1716 crm_info("%s is not ready to run resources",
1717 pcmk__node_name(this_node));
1718 this_node->details->standby = TRUE;
1719 this_node->details->pending = TRUE;
1720
1721 } else {
1722 pe_fence_node(scheduler, this_node, "peer was in an unknown state",
1723 FALSE);
1724 }
1725
1726 return (when_member > 0);
1727}
1728
1729static void
1730determine_remote_online_status(pcmk_scheduler_t *scheduler,
1731 pcmk_node_t *this_node)
1732{
1733 pcmk_resource_t *rsc = this_node->details->remote_rsc;
1734 pcmk_resource_t *container = NULL;
1735 pcmk_node_t *host = NULL;
1736
1737 /* If there is a node state entry for a (former) Pacemaker Remote node
1738 * but no resource creating that node, the node's connection resource will
1739 * be NULL. Consider it an offline remote node in that case.
1740 */
1741 if (rsc == NULL) {
1742 this_node->details->online = FALSE;
1743 goto remote_online_done;
1744 }
1745
1746 container = rsc->container;
1747
1748 if (container && pcmk__list_of_1(rsc->running_on)) {
1749 host = rsc->running_on->data;
1750 }
1751
1752 /* If the resource is currently started, mark it online. */
1753 if (rsc->role == pcmk_role_started) {
1754 crm_trace("%s node %s presumed ONLINE because connection resource is started",
1755 (container? "Guest" : "Remote"), this_node->details->id);
1756 this_node->details->online = TRUE;
1757 }
1758
1759 /* consider this node shutting down if transitioning start->stop */
1760 if ((rsc->role == pcmk_role_started)
1761 && (rsc->next_role == pcmk_role_stopped)) {
1762
1763 crm_trace("%s node %s shutting down because connection resource is stopping",
1764 (container? "Guest" : "Remote"), this_node->details->id);
1765 this_node->details->shutdown = TRUE;
1766 }
1767
1768 /* Now check all the failure conditions. */
1769 if(container && pcmk_is_set(container->flags, pcmk_rsc_failed)) {
1770 crm_trace("Guest node %s UNCLEAN because guest resource failed",
1771 this_node->details->id);
1772 this_node->details->online = FALSE;
1773 this_node->details->remote_requires_reset = TRUE;
1774
1775 } else if (pcmk_is_set(rsc->flags, pcmk_rsc_failed)) {
1776 crm_trace("%s node %s OFFLINE because connection resource failed",
1777 (container? "Guest" : "Remote"), this_node->details->id);
1778 this_node->details->online = FALSE;
1779
1780 } else if ((rsc->role == pcmk_role_stopped)
1781 || ((container != NULL)
1782 && (container->role == pcmk_role_stopped))) {
1783
1784 crm_trace("%s node %s OFFLINE because its resource is stopped",
1785 (container? "Guest" : "Remote"), this_node->details->id);
1786 this_node->details->online = FALSE;
1787 this_node->details->remote_requires_reset = FALSE;
1788
1789 } else if (host && (host->details->online == FALSE)
1790 && host->details->unclean) {
1791 crm_trace("Guest node %s UNCLEAN because host is unclean",
1792 this_node->details->id);
1793 this_node->details->online = FALSE;
1794 this_node->details->remote_requires_reset = TRUE;
1795 }
1796
1797remote_online_done:
1798 crm_trace("Remote node %s online=%s",
1799 this_node->details->id, this_node->details->online ? "TRUE" : "FALSE");
1800}
1801
1802static void
1803determine_online_status(const xmlNode *node_state, pcmk_node_t *this_node,
1805{
1806 gboolean online = FALSE;
1807 const char *exp_state = crm_element_value(node_state, PCMK_XA_EXPECTED);
1808
1809 CRM_CHECK(this_node != NULL, return);
1810
1811 this_node->details->shutdown = FALSE;
1812 this_node->details->expected_up = FALSE;
1813
1814 if (pe__shutdown_requested(this_node)) {
1815 this_node->details->shutdown = TRUE;
1816
1817 } else if (pcmk__str_eq(exp_state, CRMD_JOINSTATE_MEMBER, pcmk__str_casei)) {
1818 this_node->details->expected_up = TRUE;
1819 }
1820
1821 if (this_node->details->type == node_ping) {
1822 this_node->details->unclean = FALSE;
1823 online = FALSE; /* As far as resource management is concerned,
1824 * the node is safely offline.
1825 * Anyone caught abusing this logic will be shot
1826 */
1827
1829 online = determine_online_status_no_fencing(scheduler, node_state,
1830 this_node);
1831
1832 } else {
1833 online = determine_online_status_fencing(scheduler, node_state,
1834 this_node);
1835 }
1836
1837 if (online) {
1838 this_node->details->online = TRUE;
1839
1840 } else {
1841 /* remove node from contention */
1842 this_node->fixed = TRUE; // @COMPAT deprecated and unused
1843 this_node->weight = -PCMK_SCORE_INFINITY;
1844 }
1845
1846 if (online && this_node->details->shutdown) {
1847 /* don't run resources here */
1848 this_node->fixed = TRUE; // @COMPAT deprecated and unused
1849 this_node->weight = -PCMK_SCORE_INFINITY;
1850 }
1851
1852 if (this_node->details->type == node_ping) {
1853 crm_info("%s is not a Pacemaker node", pcmk__node_name(this_node));
1854
1855 } else if (this_node->details->unclean) {
1856 pcmk__sched_warn("%s is unclean", pcmk__node_name(this_node));
1857
1858 } else if (this_node->details->online) {
1859 crm_info("%s is %s", pcmk__node_name(this_node),
1860 this_node->details->shutdown ? "shutting down" :
1861 this_node->details->pending ? "pending" :
1862 this_node->details->standby ? "standby" :
1863 this_node->details->maintenance ? "maintenance" : "online");
1864
1865 } else {
1866 crm_trace("%s is offline", pcmk__node_name(this_node));
1867 }
1868}
1869
1878const char *
1879pe_base_name_end(const char *id)
1880{
1881 if (!pcmk__str_empty(id)) {
1882 const char *end = id + strlen(id) - 1;
1883
1884 for (const char *s = end; s > id; --s) {
1885 switch (*s) {
1886 case '0':
1887 case '1':
1888 case '2':
1889 case '3':
1890 case '4':
1891 case '5':
1892 case '6':
1893 case '7':
1894 case '8':
1895 case '9':
1896 break;
1897 case ':':
1898 return (s == end)? s : (s - 1);
1899 default:
1900 return end;
1901 }
1902 }
1903 return end;
1904 }
1905 return NULL;
1906}
1907
1918char *
1919clone_strip(const char *last_rsc_id)
1920{
1921 const char *end = pe_base_name_end(last_rsc_id);
1922 char *basename = NULL;
1923
1924 CRM_ASSERT(end);
1925 basename = strndup(last_rsc_id, end - last_rsc_id + 1);
1926 CRM_ASSERT(basename);
1927 return basename;
1928}
1929
1940char *
1941clone_zero(const char *last_rsc_id)
1942{
1943 const char *end = pe_base_name_end(last_rsc_id);
1944 size_t base_name_len = end - last_rsc_id + 1;
1945 char *zero = NULL;
1946
1947 CRM_ASSERT(end);
1948 zero = pcmk__assert_alloc(base_name_len + 3, sizeof(char));
1949 memcpy(zero, last_rsc_id, base_name_len);
1950 zero[base_name_len] = ':';
1951 zero[base_name_len + 1] = '0';
1952 return zero;
1953}
1954
1955static pcmk_resource_t *
1956create_fake_resource(const char *rsc_id, const xmlNode *rsc_entry,
1958{
1959 pcmk_resource_t *rsc = NULL;
1960 xmlNode *xml_rsc = pcmk__xe_create(NULL, PCMK_XE_PRIMITIVE);
1961
1962 pcmk__xe_copy_attrs(xml_rsc, rsc_entry, pcmk__xaf_none);
1963 crm_xml_add(xml_rsc, PCMK_XA_ID, rsc_id);
1964 crm_log_xml_debug(xml_rsc, "Orphan resource");
1965
1966 if (pe__unpack_resource(xml_rsc, &rsc, NULL, scheduler) != pcmk_rc_ok) {
1967 return NULL;
1968 }
1969
1970 if (xml_contains_remote_node(xml_rsc)) {
1971 pcmk_node_t *node;
1972
1973 crm_debug("Detected orphaned remote node %s", rsc_id);
1974 node = pcmk_find_node(scheduler, rsc_id);
1975 if (node == NULL) {
1976 node = pe_create_node(rsc_id, rsc_id, PCMK_VALUE_REMOTE, NULL,
1977 scheduler);
1978 }
1979 link_rsc2remotenode(scheduler, rsc);
1980
1981 if (node) {
1982 crm_trace("Setting node %s as shutting down due to orphaned connection resource", rsc_id);
1983 node->details->shutdown = TRUE;
1984 }
1985 }
1986
1987 if (crm_element_value(rsc_entry, PCMK__META_CONTAINER)) {
1988 /* This orphaned rsc needs to be mapped to a container. */
1989 crm_trace("Detected orphaned container filler %s", rsc_id);
1991 }
1993 scheduler->resources = g_list_append(scheduler->resources, rsc);
1994 return rsc;
1995}
1996
2008static pcmk_resource_t *
2009create_anonymous_orphan(pcmk_resource_t *parent, const char *rsc_id,
2011{
2013
2014 // find_rsc() because we might be a cloned group
2015 pcmk_resource_t *orphan = top->fns->find_rsc(top, rsc_id, NULL,
2017
2018 pcmk__rsc_debug(parent, "Created orphan %s for %s: %s on %s",
2019 top->id, parent->id, rsc_id, pcmk__node_name(node));
2020 return orphan;
2021}
2022
2038static pcmk_resource_t *
2039find_anonymous_clone(pcmk_scheduler_t *scheduler, const pcmk_node_t *node,
2040 pcmk_resource_t *parent, const char *rsc_id)
2041{
2042 GList *rIter = NULL;
2043 pcmk_resource_t *rsc = NULL;
2044 pcmk_resource_t *inactive_instance = NULL;
2045 gboolean skip_inactive = FALSE;
2046
2047 CRM_ASSERT(pcmk__is_anonymous_clone(parent));
2048
2049 // Check for active (or partially active, for cloned groups) instance
2050 pcmk__rsc_trace(parent, "Looking for %s on %s in %s",
2051 rsc_id, pcmk__node_name(node), parent->id);
2052 for (rIter = parent->children; rsc == NULL && rIter; rIter = rIter->next) {
2053 GList *locations = NULL;
2054 pcmk_resource_t *child = rIter->data;
2055
2056 /* Check whether this instance is already known to be active or pending
2057 * anywhere, at this stage of unpacking. Because this function is called
2058 * for a resource before the resource's individual operation history
2059 * entries are unpacked, locations will generally not contain the
2060 * desired node.
2061 *
2062 * However, there are three exceptions:
2063 * (1) when child is a cloned group and we have already unpacked the
2064 * history of another member of the group on the same node;
2065 * (2) when we've already unpacked the history of another numbered
2066 * instance on the same node (which can happen if
2067 * PCMK_META_GLOBALLY_UNIQUE was flipped from true to false); and
2068 * (3) when we re-run calculations on the same scheduler data as part of
2069 * a simulation.
2070 */
2071 child->fns->location(child, &locations, 2);
2072 if (locations) {
2073 /* We should never associate the same numbered anonymous clone
2074 * instance with multiple nodes, and clone instances can't migrate,
2075 * so there must be only one location, regardless of history.
2076 */
2077 CRM_LOG_ASSERT(locations->next == NULL);
2078
2079 if (pcmk__same_node((pcmk_node_t *) locations->data, node)) {
2080 /* This child instance is active on the requested node, so check
2081 * for a corresponding configured resource. We use find_rsc()
2082 * instead of child because child may be a cloned group, and we
2083 * need the particular member corresponding to rsc_id.
2084 *
2085 * If the history entry is orphaned, rsc will be NULL.
2086 */
2087 rsc = parent->fns->find_rsc(child, rsc_id, NULL,
2089 if (rsc) {
2090 /* If there are multiple instance history entries for an
2091 * anonymous clone in a single node's history (which can
2092 * happen if PCMK_META_GLOBALLY_UNIQUE is switched from true
2093 * to false), we want to consider the instances beyond the
2094 * first as orphans, even if there are inactive instance
2095 * numbers available.
2096 */
2097 if (rsc->running_on) {
2098 crm_notice("Active (now-)anonymous clone %s has "
2099 "multiple (orphan) instance histories on %s",
2100 parent->id, pcmk__node_name(node));
2101 skip_inactive = TRUE;
2102 rsc = NULL;
2103 } else {
2104 pcmk__rsc_trace(parent, "Resource %s, active", rsc->id);
2105 }
2106 }
2107 }
2108 g_list_free(locations);
2109
2110 } else {
2111 pcmk__rsc_trace(parent, "Resource %s, skip inactive", child->id);
2112 if (!skip_inactive && !inactive_instance
2113 && !pcmk_is_set(child->flags, pcmk_rsc_blocked)) {
2114 // Remember one inactive instance in case we don't find active
2115 inactive_instance = parent->fns->find_rsc(child, rsc_id, NULL,
2117
2118 /* ... but don't use it if it was already associated with a
2119 * pending action on another node
2120 */
2121 if ((inactive_instance != NULL) &&
2122 (inactive_instance->pending_node != NULL) &&
2123 !pcmk__same_node(inactive_instance->pending_node, node)) {
2124 inactive_instance = NULL;
2125 }
2126 }
2127 }
2128 }
2129
2130 if ((rsc == NULL) && !skip_inactive && (inactive_instance != NULL)) {
2131 pcmk__rsc_trace(parent, "Resource %s, empty slot",
2132 inactive_instance->id);
2133 rsc = inactive_instance;
2134 }
2135
2136 /* If the resource has PCMK_META_REQUIRES set to PCMK_VALUE_QUORUM or
2137 * PCMK_VALUE_NOTHING, and we don't have a clone instance for every node, we
2138 * don't want to consume a valid instance number for unclean nodes. Such
2139 * instances may appear to be active according to the history, but should be
2140 * considered inactive, so we can start an instance elsewhere. Treat such
2141 * instances as orphans.
2142 *
2143 * An exception is instances running on guest nodes -- since guest node
2144 * "fencing" is actually just a resource stop, requires shouldn't apply.
2145 *
2146 * @TODO Ideally, we'd use an inactive instance number if it is not needed
2147 * for any clean instances. However, we don't know that at this point.
2148 */
2149 if ((rsc != NULL) && !pcmk_is_set(rsc->flags, pcmk_rsc_needs_fencing)
2150 && (!node->details->online || node->details->unclean)
2151 && !pcmk__is_guest_or_bundle_node(node)
2153
2154 rsc = NULL;
2155 }
2156
2157 if (rsc == NULL) {
2158 rsc = create_anonymous_orphan(parent, rsc_id, node, scheduler);
2159 pcmk__rsc_trace(parent, "Resource %s, orphan", rsc->id);
2160 }
2161 return rsc;
2162}
2163
2164static pcmk_resource_t *
2165unpack_find_resource(pcmk_scheduler_t *scheduler, const pcmk_node_t *node,
2166 const char *rsc_id)
2167{
2168 pcmk_resource_t *rsc = NULL;
2169 pcmk_resource_t *parent = NULL;
2170
2171 crm_trace("looking for %s", rsc_id);
2172 rsc = pe_find_resource(scheduler->resources, rsc_id);
2173
2174 if (rsc == NULL) {
2175 /* If we didn't find the resource by its name in the operation history,
2176 * check it again as a clone instance. Even when PCMK_META_CLONE_MAX=0,
2177 * we create a single :0 orphan to match against here.
2178 */
2179 char *clone0_id = clone_zero(rsc_id);
2181 clone0_id);
2182
2183 if (clone0 && !pcmk_is_set(clone0->flags, pcmk_rsc_unique)) {
2184 rsc = clone0;
2185 parent = uber_parent(clone0);
2186 crm_trace("%s found as %s (%s)", rsc_id, clone0_id, parent->id);
2187 } else {
2188 crm_trace("%s is not known as %s either (orphan)",
2189 rsc_id, clone0_id);
2190 }
2191 free(clone0_id);
2192
2193 } else if (rsc->variant > pcmk_rsc_variant_primitive) {
2194 crm_trace("Resource history for %s is orphaned because it is no longer primitive",
2195 rsc_id);
2196 return NULL;
2197
2198 } else {
2199 parent = uber_parent(rsc);
2200 }
2201
2202 if (pcmk__is_anonymous_clone(parent)) {
2203
2204 if (pcmk__is_bundled(parent)) {
2205 rsc = pe__find_bundle_replica(parent->parent, node);
2206 } else {
2207 char *base = clone_strip(rsc_id);
2208
2209 rsc = find_anonymous_clone(scheduler, node, parent, base);
2210 free(base);
2211 CRM_ASSERT(rsc != NULL);
2212 }
2213 }
2214
2215 if (rsc && !pcmk__str_eq(rsc_id, rsc->id, pcmk__str_none)
2216 && !pcmk__str_eq(rsc_id, rsc->clone_name, pcmk__str_none)) {
2217
2218 pcmk__str_update(&rsc->clone_name, rsc_id);
2219 pcmk__rsc_debug(rsc, "Internally renamed %s on %s to %s%s",
2220 rsc_id, pcmk__node_name(node), rsc->id,
2221 pcmk_is_set(rsc->flags, pcmk_rsc_removed)? " (ORPHAN)" : "");
2222 }
2223 return rsc;
2224}
2225
2226static pcmk_resource_t *
2227process_orphan_resource(const xmlNode *rsc_entry, const pcmk_node_t *node,
2229{
2230 pcmk_resource_t *rsc = NULL;
2231 const char *rsc_id = crm_element_value(rsc_entry, PCMK_XA_ID);
2232
2233 crm_debug("Detected orphan resource %s on %s",
2234 rsc_id, pcmk__node_name(node));
2235 rsc = create_fake_resource(rsc_id, rsc_entry, scheduler);
2236 if (rsc == NULL) {
2237 return NULL;
2238 }
2239
2242
2243 } else {
2244 CRM_CHECK(rsc != NULL, return NULL);
2245 pcmk__rsc_trace(rsc, "Added orphan %s", rsc->id);
2247 "__orphan_do_not_run__", scheduler);
2248 }
2249 return rsc;
2250}
2251
2252static void
2253process_rsc_state(pcmk_resource_t *rsc, pcmk_node_t *node,
2254 enum action_fail_response on_fail)
2255{
2256 pcmk_node_t *tmpnode = NULL;
2257 char *reason = NULL;
2258 enum action_fail_response save_on_fail = pcmk_on_fail_ignore;
2259
2260 CRM_ASSERT(rsc);
2261 pcmk__rsc_trace(rsc, "Resource %s is %s on %s: on_fail=%s",
2262 rsc->id, pcmk_role_text(rsc->role), pcmk__node_name(node),
2263 pcmk_on_fail_text(on_fail));
2264
2265 /* process current state */
2266 if (rsc->role != pcmk_role_unknown) {
2267 pcmk_resource_t *iter = rsc;
2268
2269 while (iter) {
2270 if (g_hash_table_lookup(iter->known_on, node->details->id) == NULL) {
2271 pcmk_node_t *n = pe__copy_node(node);
2272
2273 pcmk__rsc_trace(rsc, "%s%s%s known on %s",
2274 rsc->id,
2275 ((rsc->clone_name == NULL)? "" : " also known as "),
2276 ((rsc->clone_name == NULL)? "" : rsc->clone_name),
2277 pcmk__node_name(n));
2278 g_hash_table_insert(iter->known_on, (gpointer) n->details->id, n);
2279 }
2280 if (pcmk_is_set(iter->flags, pcmk_rsc_unique)) {
2281 break;
2282 }
2283 iter = iter->parent;
2284 }
2285 }
2286
2287 /* If a managed resource is believed to be running, but node is down ... */
2288 if ((rsc->role > pcmk_role_stopped)
2289 && node->details->online == FALSE
2290 && node->details->maintenance == FALSE
2292
2293 gboolean should_fence = FALSE;
2294
2295 /* If this is a guest node, fence it (regardless of whether fencing is
2296 * enabled, because guest node fencing is done by recovery of the
2297 * container resource rather than by the fencer). Mark the resource
2298 * we're processing as failed. When the guest comes back up, its
2299 * operation history in the CIB will be cleared, freeing the affected
2300 * resource to run again once we are sure we know its state.
2301 */
2302 if (pcmk__is_guest_or_bundle_node(node)) {
2304 should_fence = TRUE;
2305
2306 } else if (pcmk_is_set(rsc->cluster->flags,
2308 if (pcmk__is_remote_node(node)
2309 && (node->details->remote_rsc != NULL)
2310 && !pcmk_is_set(node->details->remote_rsc->flags,
2311 pcmk_rsc_failed)) {
2312
2313 /* Setting unseen means that fencing of the remote node will
2314 * occur only if the connection resource is not going to start
2315 * somewhere. This allows connection resources on a failed
2316 * cluster node to move to another node without requiring the
2317 * remote nodes to be fenced as well.
2318 */
2319 node->details->unseen = TRUE;
2320 reason = crm_strdup_printf("%s is active there (fencing will be"
2321 " revoked if remote connection can "
2322 "be re-established elsewhere)",
2323 rsc->id);
2324 }
2325 should_fence = TRUE;
2326 }
2327
2328 if (should_fence) {
2329 if (reason == NULL) {
2330 reason = crm_strdup_printf("%s is thought to be active there", rsc->id);
2331 }
2332 pe_fence_node(rsc->cluster, node, reason, FALSE);
2333 }
2334 free(reason);
2335 }
2336
2337 /* In order to calculate priority_fencing_delay correctly, save the failure information and pass it to native_add_running(). */
2338 save_on_fail = on_fail;
2339
2340 if (node->details->unclean) {
2341 /* No extra processing needed
2342 * Also allows resources to be started again after a node is shot
2343 */
2344 on_fail = pcmk_on_fail_ignore;
2345 }
2346
2347 switch (on_fail) {
2349 /* nothing to do */
2350 break;
2351
2354 demote_action(rsc, node, FALSE);
2355 break;
2356
2358 /* treat it as if it is still running
2359 * but also mark the node as unclean
2360 */
2361 reason = crm_strdup_printf("%s failed there", rsc->id);
2362 pe_fence_node(rsc->cluster, node, reason, FALSE);
2363 free(reason);
2364 break;
2365
2367 node->details->standby = TRUE;
2368 node->details->standby_onfail = TRUE;
2369 break;
2370
2371 case pcmk_on_fail_block:
2372 /* is_managed == FALSE will prevent any
2373 * actions being sent for the resource
2374 */
2377 break;
2378
2379 case pcmk_on_fail_ban:
2380 /* make sure it comes up somewhere else
2381 * or not at all
2382 */
2384 "__action_migration_auto__", rsc->cluster);
2385 break;
2386
2387 case pcmk_on_fail_stop:
2390 break;
2391
2393 if ((rsc->role != pcmk_role_stopped)
2394 && (rsc->role != pcmk_role_unknown)) {
2397 stop_action(rsc, node, FALSE);
2398 }
2399 break;
2400
2403 if ((rsc->container != NULL) && pcmk__is_bundled(rsc)) {
2404 /* A bundle's remote connection can run on a different node than
2405 * the bundle's container. We don't necessarily know where the
2406 * container is running yet, so remember it and add a stop
2407 * action for it later.
2408 */
2409 rsc->cluster->stop_needed =
2410 g_list_prepend(rsc->cluster->stop_needed, rsc->container);
2411 } else if (rsc->container) {
2412 stop_action(rsc->container, node, FALSE);
2413 } else if ((rsc->role != pcmk_role_stopped)
2414 && (rsc->role != pcmk_role_unknown)) {
2415 stop_action(rsc, node, FALSE);
2416 }
2417 break;
2418
2422 tmpnode = NULL;
2423 if (rsc->is_remote_node) {
2424 tmpnode = pcmk_find_node(rsc->cluster, rsc->id);
2425 }
2426 if (pcmk__is_remote_node(tmpnode)
2427 && !(tmpnode->details->remote_was_fenced)) {
2428 /* The remote connection resource failed in a way that
2429 * should result in fencing the remote node.
2430 */
2431 pe_fence_node(rsc->cluster, tmpnode,
2432 "remote connection is unrecoverable", FALSE);
2433 }
2434 }
2435
2436 /* require the stop action regardless if fencing is occurring or not. */
2437 if (rsc->role > pcmk_role_stopped) {
2438 stop_action(rsc, node, FALSE);
2439 }
2440
2441 /* if reconnect delay is in use, prevent the connection from exiting the
2442 * "STOPPED" role until the failure is cleared by the delay timeout. */
2443 if (rsc->remote_reconnect_ms) {
2444 pe__set_next_role(rsc, pcmk_role_stopped, "remote reset");
2445 }
2446 break;
2447 }
2448
2449 /* ensure a remote-node connection failure forces an unclean remote-node
2450 * to be fenced. By setting unseen = FALSE, the remote-node failure will
2451 * result in a fencing operation regardless if we're going to attempt to
2452 * reconnect to the remote-node in this transition or not. */
2453 if (pcmk_is_set(rsc->flags, pcmk_rsc_failed) && rsc->is_remote_node) {
2454 tmpnode = pcmk_find_node(rsc->cluster, rsc->id);
2455 if (tmpnode && tmpnode->details->unclean) {
2456 tmpnode->details->unseen = FALSE;
2457 }
2458 }
2459
2460 if ((rsc->role != pcmk_role_stopped)
2461 && (rsc->role != pcmk_role_unknown)) {
2462 if (pcmk_is_set(rsc->flags, pcmk_rsc_removed)) {
2463 if (pcmk_is_set(rsc->flags, pcmk_rsc_managed)) {
2464 crm_notice("Removed resource %s is active on %s and will be "
2465 "stopped when possible",
2466 rsc->id, pcmk__node_name(node));
2467 } else {
2468 crm_notice("Removed resource %s must be stopped manually on %s "
2470 " is set to false", rsc->id, pcmk__node_name(node));
2471 }
2472 }
2473
2474 native_add_running(rsc, node, rsc->cluster,
2475 (save_on_fail != pcmk_on_fail_ignore));
2476 switch (on_fail) {
2478 break;
2480 case pcmk_on_fail_block:
2482 break;
2483 default:
2486 break;
2487 }
2488
2489 } else if (rsc->clone_name && strchr(rsc->clone_name, ':') != NULL) {
2490 /* Only do this for older status sections that included instance numbers
2491 * Otherwise stopped instances will appear as orphans
2492 */
2493 pcmk__rsc_trace(rsc, "Resetting clone_name %s for %s (stopped)",
2494 rsc->clone_name, rsc->id);
2495 free(rsc->clone_name);
2496 rsc->clone_name = NULL;
2497
2498 } else {
2499 GList *possible_matches = pe__resource_actions(rsc, node,
2500 PCMK_ACTION_STOP, FALSE);
2501 GList *gIter = possible_matches;
2502
2503 for (; gIter != NULL; gIter = gIter->next) {
2504 pcmk_action_t *stop = (pcmk_action_t *) gIter->data;
2505
2507 }
2508
2509 g_list_free(possible_matches);
2510 }
2511
2512 /* A successful stop after migrate_to on the migration source doesn't make
2513 * the partially migrated resource stopped on the migration target.
2514 */
2515 if ((rsc->role == pcmk_role_stopped)
2517 && rsc->partial_migration_source->details == node->details
2519 && rsc->running_on) {
2520
2521 rsc->role = pcmk_role_started;
2522 }
2523}
2524
2525/* create active recurring operations as optional */
2526static void
2527process_recurring(pcmk_node_t *node, pcmk_resource_t *rsc,
2528 int start_index, int stop_index,
2529 GList *sorted_op_list, pcmk_scheduler_t *scheduler)
2530{
2531 int counter = -1;
2532 const char *task = NULL;
2533 const char *status = NULL;
2534 GList *gIter = sorted_op_list;
2535
2536 CRM_ASSERT(rsc);
2537 pcmk__rsc_trace(rsc, "%s: Start index %d, stop index = %d",
2538 rsc->id, start_index, stop_index);
2539
2540 for (; gIter != NULL; gIter = gIter->next) {
2541 xmlNode *rsc_op = (xmlNode *) gIter->data;
2542
2543 guint interval_ms = 0;
2544 char *key = NULL;
2545 const char *id = pcmk__xe_id(rsc_op);
2546
2547 counter++;
2548
2549 if (node->details->online == FALSE) {
2550 pcmk__rsc_trace(rsc, "Skipping %s on %s: node is offline",
2551 rsc->id, pcmk__node_name(node));
2552 break;
2553
2554 /* Need to check if there's a monitor for role="Stopped" */
2555 } else if (start_index < stop_index && counter <= stop_index) {
2556 pcmk__rsc_trace(rsc, "Skipping %s on %s: resource is not active",
2557 id, pcmk__node_name(node));
2558 continue;
2559
2560 } else if (counter < start_index) {
2561 pcmk__rsc_trace(rsc, "Skipping %s on %s: old %d",
2562 id, pcmk__node_name(node), counter);
2563 continue;
2564 }
2565
2566 crm_element_value_ms(rsc_op, PCMK_META_INTERVAL, &interval_ms);
2567 if (interval_ms == 0) {
2568 pcmk__rsc_trace(rsc, "Skipping %s on %s: non-recurring",
2569 id, pcmk__node_name(node));
2570 continue;
2571 }
2572
2573 status = crm_element_value(rsc_op, PCMK__XA_OP_STATUS);
2574 if (pcmk__str_eq(status, "-1", pcmk__str_casei)) {
2575 pcmk__rsc_trace(rsc, "Skipping %s on %s: status",
2576 id, pcmk__node_name(node));
2577 continue;
2578 }
2579 task = crm_element_value(rsc_op, PCMK_XA_OPERATION);
2580 /* create the action */
2581 key = pcmk__op_key(rsc->id, task, interval_ms);
2582 pcmk__rsc_trace(rsc, "Creating %s on %s", key, pcmk__node_name(node));
2583 custom_action(rsc, key, task, node, TRUE, scheduler);
2584 }
2585}
2586
2587void
2588calculate_active_ops(const GList *sorted_op_list, int *start_index,
2589 int *stop_index)
2590{
2591 int counter = -1;
2592 int implied_monitor_start = -1;
2593 int implied_clone_start = -1;
2594 const char *task = NULL;
2595 const char *status = NULL;
2596
2597 *stop_index = -1;
2598 *start_index = -1;
2599
2600 for (const GList *iter = sorted_op_list; iter != NULL; iter = iter->next) {
2601 const xmlNode *rsc_op = (const xmlNode *) iter->data;
2602
2603 counter++;
2604
2605 task = crm_element_value(rsc_op, PCMK_XA_OPERATION);
2606 status = crm_element_value(rsc_op, PCMK__XA_OP_STATUS);
2607
2608 if (pcmk__str_eq(task, PCMK_ACTION_STOP, pcmk__str_casei)
2609 && pcmk__str_eq(status, "0", pcmk__str_casei)) {
2610 *stop_index = counter;
2611
2612 } else if (pcmk__strcase_any_of(task, PCMK_ACTION_START,
2613 PCMK_ACTION_MIGRATE_FROM, NULL)) {
2614 *start_index = counter;
2615
2616 } else if ((implied_monitor_start <= *stop_index)
2617 && pcmk__str_eq(task, PCMK_ACTION_MONITOR,
2618 pcmk__str_casei)) {
2619 const char *rc = crm_element_value(rsc_op, PCMK__XA_RC_CODE);
2620
2621 if (pcmk__strcase_any_of(rc, "0", "8", NULL)) {
2622 implied_monitor_start = counter;
2623 }
2625 PCMK_ACTION_DEMOTE, NULL)) {
2626 implied_clone_start = counter;
2627 }
2628 }
2629
2630 if (*start_index == -1) {
2631 if (implied_clone_start != -1) {
2632 *start_index = implied_clone_start;
2633 } else if (implied_monitor_start != -1) {
2634 *start_index = implied_monitor_start;
2635 }
2636 }
2637}
2638
2639// If resource history entry has shutdown lock, remember lock node and time
2640static void
2641unpack_shutdown_lock(const xmlNode *rsc_entry, pcmk_resource_t *rsc,
2643{
2644 time_t lock_time = 0; // When lock started (i.e. node shutdown time)
2645
2647 &lock_time) == pcmk_ok) && (lock_time != 0)) {
2648
2649 if ((scheduler->shutdown_lock > 0)
2651 > (lock_time + scheduler->shutdown_lock))) {
2652 pcmk__rsc_info(rsc, "Shutdown lock for %s on %s expired",
2653 rsc->id, pcmk__node_name(node));
2654 pe__clear_resource_history(rsc, node);
2655 } else {
2656 /* @COMPAT I don't like breaking const signatures, but
2657 * rsc->lock_node should really be const -- we just can't change it
2658 * until the next API compatibility break.
2659 */
2660 rsc->lock_node = (pcmk_node_t *) node;
2661 rsc->lock_time = lock_time;
2662 }
2663 }
2664}
2665
2676static pcmk_resource_t *
2677unpack_lrm_resource(pcmk_node_t *node, const xmlNode *lrm_resource,
2679{
2680 GList *gIter = NULL;
2681 int stop_index = -1;
2682 int start_index = -1;
2683 enum rsc_role_e req_role = pcmk_role_unknown;
2684
2685 const char *rsc_id = pcmk__xe_id(lrm_resource);
2686
2687 pcmk_resource_t *rsc = NULL;
2688 GList *op_list = NULL;
2689 GList *sorted_op_list = NULL;
2690
2691 xmlNode *rsc_op = NULL;
2692 xmlNode *last_failure = NULL;
2693
2695 enum rsc_role_e saved_role = pcmk_role_unknown;
2696
2697 if (rsc_id == NULL) {
2698 pcmk__config_err("Ignoring invalid " PCMK__XE_LRM_RESOURCE
2699 " entry: No " PCMK_XA_ID);
2700 crm_log_xml_info(lrm_resource, "missing-id");
2701 return NULL;
2702 }
2703 crm_trace("Unpacking " PCMK__XE_LRM_RESOURCE " for %s on %s",
2704 rsc_id, pcmk__node_name(node));
2705
2706 /* Build a list of individual PCMK__XE_LRM_RSC_OP entries, so we can sort
2707 * them
2708 */
2709 for (rsc_op = pcmk__xe_first_child(lrm_resource, PCMK__XE_LRM_RSC_OP, NULL,
2710 NULL);
2711 rsc_op != NULL; rsc_op = pcmk__xe_next_same(rsc_op)) {
2712
2713 op_list = g_list_prepend(op_list, rsc_op);
2714 }
2715
2717 if (op_list == NULL) {
2718 // If there are no operations, there is nothing to do
2719 return NULL;
2720 }
2721 }
2722
2723 /* find the resource */
2724 rsc = unpack_find_resource(scheduler, node, rsc_id);
2725 if (rsc == NULL) {
2726 if (op_list == NULL) {
2727 // If there are no operations, there is nothing to do
2728 return NULL;
2729 } else {
2730 rsc = process_orphan_resource(lrm_resource, node, scheduler);
2731 }
2732 }
2733 CRM_ASSERT(rsc != NULL);
2734
2735 // Check whether the resource is "shutdown-locked" to this node
2737 unpack_shutdown_lock(lrm_resource, rsc, node, scheduler);
2738 }
2739
2740 /* process operations */
2741 saved_role = rsc->role;
2742 rsc->role = pcmk_role_unknown;
2743 sorted_op_list = g_list_sort(op_list, sort_op_by_callid);
2744
2745 for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) {
2746 xmlNode *rsc_op = (xmlNode *) gIter->data;
2747
2748 unpack_rsc_op(rsc, node, rsc_op, &last_failure, &on_fail);
2749 }
2750
2751 /* create active recurring operations as optional */
2752 calculate_active_ops(sorted_op_list, &start_index, &stop_index);
2753 process_recurring(node, rsc, start_index, stop_index, sorted_op_list,
2754 scheduler);
2755
2756 /* no need to free the contents */
2757 g_list_free(sorted_op_list);
2758
2759 process_rsc_state(rsc, node, on_fail);
2760
2761 if (get_target_role(rsc, &req_role)) {
2762 if ((rsc->next_role == pcmk_role_unknown)
2763 || (req_role < rsc->next_role)) {
2764
2766
2767 } else if (req_role > rsc->next_role) {
2768 pcmk__rsc_info(rsc,
2769 "%s: Not overwriting calculated next role %s"
2770 " with requested next role %s",
2771 rsc->id, pcmk_role_text(rsc->next_role),
2772 pcmk_role_text(req_role));
2773 }
2774 }
2775
2776 if (saved_role > rsc->role) {
2777 rsc->role = saved_role;
2778 }
2779
2780 return rsc;
2781}
2782
2783static void
2784handle_orphaned_container_fillers(const xmlNode *lrm_rsc_list,
2786{
2787 for (const xmlNode *rsc_entry = pcmk__xe_first_child(lrm_rsc_list, NULL,
2788 NULL, NULL);
2789 rsc_entry != NULL; rsc_entry = pcmk__xe_next(rsc_entry)) {
2790
2791 pcmk_resource_t *rsc;
2792 pcmk_resource_t *container;
2793 const char *rsc_id;
2794 const char *container_id;
2795
2796 if (!pcmk__xe_is(rsc_entry, PCMK__XE_LRM_RESOURCE)) {
2797 continue;
2798 }
2799
2800 container_id = crm_element_value(rsc_entry, PCMK__META_CONTAINER);
2801 rsc_id = crm_element_value(rsc_entry, PCMK_XA_ID);
2802 if (container_id == NULL || rsc_id == NULL) {
2803 continue;
2804 }
2805
2806 container = pe_find_resource(scheduler->resources, container_id);
2807 if (container == NULL) {
2808 continue;
2809 }
2810
2811 rsc = pe_find_resource(scheduler->resources, rsc_id);
2812 if ((rsc == NULL) || (rsc->container != NULL)
2814 continue;
2815 }
2816
2817 pcmk__rsc_trace(rsc, "Mapped container of orphaned resource %s to %s",
2818 rsc->id, container_id);
2819 rsc->container = container;
2820 container->fillers = g_list_append(container->fillers, rsc);
2821 }
2822}
2823
2832static void
2833unpack_node_lrm(pcmk_node_t *node, const xmlNode *xml,
2835{
2836 bool found_orphaned_container_filler = false;
2837
2838 // Drill down to PCMK__XE_LRM_RESOURCES section
2839 xml = pcmk__xe_first_child(xml, PCMK__XE_LRM, NULL, NULL);
2840 if (xml == NULL) {
2841 return;
2842 }
2843 xml = pcmk__xe_first_child(xml, PCMK__XE_LRM_RESOURCES, NULL, NULL);
2844 if (xml == NULL) {
2845 return;
2846 }
2847
2848 // Unpack each PCMK__XE_LRM_RESOURCE entry
2849 for (const xmlNode *rsc_entry = pcmk__xe_first_child(xml,
2851 NULL, NULL);
2852 rsc_entry != NULL; rsc_entry = pcmk__xe_next_same(rsc_entry)) {
2853
2854 pcmk_resource_t *rsc = unpack_lrm_resource(node, rsc_entry, scheduler);
2855
2856 if ((rsc != NULL)
2858 found_orphaned_container_filler = true;
2859 }
2860 }
2861
2862 /* Now that all resource state has been unpacked for this node, map any
2863 * orphaned container fillers to their container resource.
2864 */
2865 if (found_orphaned_container_filler) {
2866 handle_orphaned_container_fillers(xml, scheduler);
2867 }
2868}
2869
2870static void
2871set_active(pcmk_resource_t *rsc)
2872{
2873 const pcmk_resource_t *top = pe__const_top_resource(rsc, false);
2874
2875 if (top && pcmk_is_set(top->flags, pcmk_rsc_promotable)) {
2877 } else {
2878 rsc->role = pcmk_role_started;
2879 }
2880}
2881
2882static void
2883set_node_score(gpointer key, gpointer value, gpointer user_data)
2884{
2885 pcmk_node_t *node = value;
2886 int *score = user_data;
2887
2888 node->weight = *score;
2889}
2890
2891#define XPATH_NODE_STATE "/" PCMK_XE_CIB "/" PCMK_XE_STATUS \
2892 "/" PCMK__XE_NODE_STATE
2893#define SUB_XPATH_LRM_RESOURCE "/" PCMK__XE_LRM \
2894 "/" PCMK__XE_LRM_RESOURCES \
2895 "/" PCMK__XE_LRM_RESOURCE
2896#define SUB_XPATH_LRM_RSC_OP "/" PCMK__XE_LRM_RSC_OP
2897
2898static xmlNode *
2899find_lrm_op(const char *resource, const char *op, const char *node, const char *source,
2900 int target_rc, pcmk_scheduler_t *scheduler)
2901{
2902 GString *xpath = NULL;
2903 xmlNode *xml = NULL;
2904
2905 CRM_CHECK((resource != NULL) && (op != NULL) && (node != NULL),
2906 return NULL);
2907
2908 xpath = g_string_sized_new(256);
2909 pcmk__g_strcat(xpath,
2910 XPATH_NODE_STATE "[@" PCMK_XA_UNAME "='", node, "']"
2911 SUB_XPATH_LRM_RESOURCE "[@" PCMK_XA_ID "='", resource, "']"
2912 SUB_XPATH_LRM_RSC_OP "[@" PCMK_XA_OPERATION "='", op, "'",
2913 NULL);
2914
2915 /* Need to check against transition_magic too? */
2916 if ((source != NULL) && (strcmp(op, PCMK_ACTION_MIGRATE_TO) == 0)) {
2917 pcmk__g_strcat(xpath,
2918 " and @" PCMK__META_MIGRATE_TARGET "='", source, "']",
2919 NULL);
2920
2921 } else if ((source != NULL)
2922 && (strcmp(op, PCMK_ACTION_MIGRATE_FROM) == 0)) {
2923 pcmk__g_strcat(xpath,
2924 " and @" PCMK__META_MIGRATE_SOURCE "='", source, "']",
2925 NULL);
2926 } else {
2927 g_string_append_c(xpath, ']');
2928 }
2929
2930 xml = get_xpath_object((const char *) xpath->str, scheduler->input,
2931 LOG_DEBUG);
2932 g_string_free(xpath, TRUE);
2933
2934 if (xml && target_rc >= 0) {
2935 int rc = PCMK_OCF_UNKNOWN_ERROR;
2936 int status = PCMK_EXEC_ERROR;
2937
2940 if ((rc != target_rc) || (status != PCMK_EXEC_DONE)) {
2941 return NULL;
2942 }
2943 }
2944 return xml;
2945}
2946
2947static xmlNode *
2948find_lrm_resource(const char *rsc_id, const char *node_name,
2950{
2951 GString *xpath = NULL;
2952 xmlNode *xml = NULL;
2953
2954 CRM_CHECK((rsc_id != NULL) && (node_name != NULL), return NULL);
2955
2956 xpath = g_string_sized_new(256);
2957 pcmk__g_strcat(xpath,
2958 XPATH_NODE_STATE "[@" PCMK_XA_UNAME "='", node_name, "']"
2959 SUB_XPATH_LRM_RESOURCE "[@" PCMK_XA_ID "='", rsc_id, "']",
2960 NULL);
2961
2962 xml = get_xpath_object((const char *) xpath->str, scheduler->input,
2963 LOG_DEBUG);
2964
2965 g_string_free(xpath, TRUE);
2966 return xml;
2967}
2968
2978static bool
2979unknown_on_node(pcmk_resource_t *rsc, const char *node_name)
2980{
2981 bool result = false;
2982 xmlXPathObjectPtr search;
2983 char *xpath = NULL;
2984
2988 "[@" PCMK__XA_RC_CODE "!='%d']",
2989 node_name, rsc->id, PCMK_OCF_UNKNOWN);
2990
2991 search = xpath_search(rsc->cluster->input, xpath);
2992 result = (numXpathResults(search) == 0);
2993 freeXpathObject(search);
2994 free(xpath);
2995 return result;
2996}
2997
3010static bool
3011monitor_not_running_after(const char *rsc_id, const char *node_name,
3012 const xmlNode *xml_op, bool same_node,
3014{
3015 /* Any probe/monitor operation on the node indicating it was not running
3016 * there
3017 */
3018 xmlNode *monitor = find_lrm_op(rsc_id, PCMK_ACTION_MONITOR, node_name,
3020
3021 return (monitor && pe__is_newer_op(monitor, xml_op, same_node) > 0);
3022}
3023
3036static bool
3037non_monitor_after(const char *rsc_id, const char *node_name,
3038 const xmlNode *xml_op, bool same_node,
3040{
3041 xmlNode *lrm_resource = NULL;
3042
3043 lrm_resource = find_lrm_resource(rsc_id, node_name, scheduler);
3044 if (lrm_resource == NULL) {
3045 return false;
3046 }
3047
3048 for (xmlNode *op = pcmk__xe_first_child(lrm_resource, PCMK__XE_LRM_RSC_OP,
3049 NULL, NULL);
3050 op != NULL; op = pcmk__xe_next_same(op)) {
3051
3052 const char * task = NULL;
3053
3054 if (op == xml_op) {
3055 continue;
3056 }
3057
3059
3062 NULL)
3063 && pe__is_newer_op(op, xml_op, same_node) > 0) {
3064 return true;
3065 }
3066 }
3067
3068 return false;
3069}
3070
3083static bool
3084newer_state_after_migrate(const char *rsc_id, const char *node_name,
3085 const xmlNode *migrate_to,
3086 const xmlNode *migrate_from,
3088{
3089 const xmlNode *xml_op = migrate_to;
3090 const char *source = NULL;
3091 const char *target = NULL;
3092 bool same_node = false;
3093
3094 if (migrate_from) {
3095 xml_op = migrate_from;
3096 }
3097
3100
3101 /* It's preferred to compare to the migrate event on the same node if
3102 * existing, since call ids are more reliable.
3103 */
3104 if (pcmk__str_eq(node_name, target, pcmk__str_casei)) {
3105 if (migrate_from) {
3106 xml_op = migrate_from;
3107 same_node = true;
3108
3109 } else {
3110 xml_op = migrate_to;
3111 }
3112
3113 } else if (pcmk__str_eq(node_name, source, pcmk__str_casei)) {
3114 if (migrate_to) {
3115 xml_op = migrate_to;
3116 same_node = true;
3117
3118 } else {
3119 xml_op = migrate_from;
3120 }
3121 }
3122
3123 /* If there's any newer non-monitor operation on the node, or any newer
3124 * probe/monitor operation on the node indicating it was not running there,
3125 * the migration events potentially no longer matter for the node.
3126 */
3127 return non_monitor_after(rsc_id, node_name, xml_op, same_node, scheduler)
3128 || monitor_not_running_after(rsc_id, node_name, xml_op, same_node,
3129 scheduler);
3130}
3131
3144static int
3145get_migration_node_names(const xmlNode *entry, const pcmk_node_t *source_node,
3146 const pcmk_node_t *target_node,
3147 const char **source_name, const char **target_name)
3148{
3149 *source_name = crm_element_value(entry, PCMK__META_MIGRATE_SOURCE);
3150 *target_name = crm_element_value(entry, PCMK__META_MIGRATE_TARGET);
3151 if ((*source_name == NULL) || (*target_name == NULL)) {
3152 pcmk__config_err("Ignoring resource history entry %s without "
3154 PCMK__META_MIGRATE_TARGET, pcmk__xe_id(entry));
3155 return pcmk_rc_unpack_error;
3156 }
3157
3158 if ((source_node != NULL)
3159 && !pcmk__str_eq(*source_name, source_node->details->uname,
3161 pcmk__config_err("Ignoring resource history entry %s because "
3162 PCMK__META_MIGRATE_SOURCE "='%s' does not match %s",
3163 pcmk__xe_id(entry), *source_name,
3164 pcmk__node_name(source_node));
3165 return pcmk_rc_unpack_error;
3166 }
3167
3168 if ((target_node != NULL)
3169 && !pcmk__str_eq(*target_name, target_node->details->uname,
3171 pcmk__config_err("Ignoring resource history entry %s because "
3172 PCMK__META_MIGRATE_TARGET "='%s' does not match %s",
3173 pcmk__xe_id(entry), *target_name,
3174 pcmk__node_name(target_node));
3175 return pcmk_rc_unpack_error;
3176 }
3177
3178 return pcmk_rc_ok;
3179}
3180
3181/*
3182 * \internal
3183 * \brief Add a migration source to a resource's list of dangling migrations
3184 *
3185 * If the migrate_to and migrate_from actions in a live migration both
3186 * succeeded, but there is no stop on the source, the migration is considered
3187 * "dangling." Add the source to the resource's dangling migration list, which
3188 * will be used to schedule a stop on the source without affecting the target.
3189 *
3190 * \param[in,out] rsc Resource involved in migration
3191 * \param[in] node Migration source
3192 */
3193static void
3194add_dangling_migration(pcmk_resource_t *rsc, const pcmk_node_t *node)
3195{
3196 pcmk__rsc_trace(rsc, "Dangling migration of %s requires stop on %s",
3197 rsc->id, pcmk__node_name(node));
3198 rsc->role = pcmk_role_stopped;
3199 rsc->dangling_migrations = g_list_prepend(rsc->dangling_migrations,
3200 (gpointer) node);
3201}
3202
3209static void
3210unpack_migrate_to_success(struct action_history *history)
3211{
3212 /* A complete migration sequence is:
3213 * 1. migrate_to on source node (which succeeded if we get to this function)
3214 * 2. migrate_from on target node
3215 * 3. stop on source node
3216 *
3217 * If no migrate_from has happened, the migration is considered to be
3218 * "partial". If the migrate_from succeeded but no stop has happened, the
3219 * migration is considered to be "dangling".
3220 *
3221 * If a successful migrate_to and stop have happened on the source node, we
3222 * still need to check for a partial migration, due to scenarios (easier to
3223 * produce with batch-limit=1) like:
3224 *
3225 * - A resource is migrating from node1 to node2, and a migrate_to is
3226 * initiated for it on node1.
3227 *
3228 * - node2 goes into standby mode while the migrate_to is pending, which
3229 * aborts the transition.
3230 *
3231 * - Upon completion of the migrate_to, a new transition schedules a stop
3232 * on both nodes and a start on node1.
3233 *
3234 * - If the new transition is aborted for any reason while the resource is
3235 * stopping on node1, the transition after that stop completes will see
3236 * the migrate_to and stop on the source, but it's still a partial
3237 * migration, and the resource must be stopped on node2 because it is
3238 * potentially active there due to the migrate_to.
3239 *
3240 * We also need to take into account that either node's history may be
3241 * cleared at any point in the migration process.
3242 */
3243 int from_rc = PCMK_OCF_OK;
3244 int from_status = PCMK_EXEC_PENDING;
3245 pcmk_node_t *target_node = NULL;
3246 xmlNode *migrate_from = NULL;
3247 const char *source = NULL;
3248 const char *target = NULL;
3249 bool source_newer_op = false;
3250 bool target_newer_state = false;
3251 bool active_on_target = false;
3252
3253 // Get source and target node names from XML
3254 if (get_migration_node_names(history->xml, history->node, NULL, &source,
3255 &target) != pcmk_rc_ok) {
3256 return;
3257 }
3258
3259 // Check for newer state on the source
3260 source_newer_op = non_monitor_after(history->rsc->id, source, history->xml,
3261 true, history->rsc->cluster);
3262
3263 // Check for a migrate_from action from this source on the target
3264 migrate_from = find_lrm_op(history->rsc->id, PCMK_ACTION_MIGRATE_FROM,
3265 target, source, -1, history->rsc->cluster);
3266 if (migrate_from != NULL) {
3267 if (source_newer_op) {
3268 /* There's a newer non-monitor operation on the source and a
3269 * migrate_from on the target, so this migrate_to is irrelevant to
3270 * the resource's state.
3271 */
3272 return;
3273 }
3274 crm_element_value_int(migrate_from, PCMK__XA_RC_CODE, &from_rc);
3275 crm_element_value_int(migrate_from, PCMK__XA_OP_STATUS, &from_status);
3276 }
3277
3278 /* If the resource has newer state on both the source and target after the
3279 * migration events, this migrate_to is irrelevant to the resource's state.
3280 */
3281 target_newer_state = newer_state_after_migrate(history->rsc->id, target,
3282 history->xml, migrate_from,
3283 history->rsc->cluster);
3284 if (source_newer_op && target_newer_state) {
3285 return;
3286 }
3287
3288 /* Check for dangling migration (migrate_from succeeded but stop not done).
3289 * We know there's no stop because we already returned if the target has a
3290 * migrate_from and the source has any newer non-monitor operation.
3291 */
3292 if ((from_rc == PCMK_OCF_OK) && (from_status == PCMK_EXEC_DONE)) {
3293 add_dangling_migration(history->rsc, history->node);
3294 return;
3295 }
3296
3297 /* Without newer state, this migrate_to implies the resource is active.
3298 * (Clones are not allowed to migrate, so role can't be promoted.)
3299 */
3300 history->rsc->role = pcmk_role_started;
3301
3302 target_node = pcmk_find_node(history->rsc->cluster, target);
3303 active_on_target = !target_newer_state && (target_node != NULL)
3304 && target_node->details->online;
3305
3306 if (from_status != PCMK_EXEC_PENDING) { // migrate_from failed on target
3307 if (active_on_target) {
3308 native_add_running(history->rsc, target_node, history->rsc->cluster,
3309 TRUE);
3310 } else {
3311 // Mark resource as failed, require recovery, and prevent migration
3312 pcmk__set_rsc_flags(history->rsc,
3315 }
3316 return;
3317 }
3318
3319 // The migrate_from is pending, complete but erased, or to be scheduled
3320
3321 /* If there is no history at all for the resource on an online target, then
3322 * it was likely cleaned. Just return, and we'll schedule a probe. Once we
3323 * have the probe result, it will be reflected in target_newer_state.
3324 */
3325 if ((target_node != NULL) && target_node->details->online
3326 && unknown_on_node(history->rsc, target)) {
3327 return;
3328 }
3329
3330 if (active_on_target) {
3331 pcmk_node_t *source_node = pcmk_find_node(history->rsc->cluster,
3332 source);
3333
3334 native_add_running(history->rsc, target_node, history->rsc->cluster,
3335 FALSE);
3336 if ((source_node != NULL) && source_node->details->online) {
3337 /* This is a partial migration: the migrate_to completed
3338 * successfully on the source, but the migrate_from has not
3339 * completed. Remember the source and target; if the newly
3340 * chosen target remains the same when we schedule actions
3341 * later, we may continue with the migration.
3342 */
3343 history->rsc->partial_migration_target = target_node;
3344 history->rsc->partial_migration_source = source_node;
3345 }
3346
3347 } else if (!source_newer_op) {
3348 // Mark resource as failed, require recovery, and prevent migration
3349 pcmk__set_rsc_flags(history->rsc,
3352 }
3353}
3354
3361static void
3362unpack_migrate_to_failure(struct action_history *history)
3363{
3364 xmlNode *target_migrate_from = NULL;
3365 const char *source = NULL;
3366 const char *target = NULL;
3367
3368 // Get source and target node names from XML
3369 if (get_migration_node_names(history->xml, history->node, NULL, &source,
3370 &target) != pcmk_rc_ok) {
3371 return;
3372 }
3373
3374 /* If a migration failed, we have to assume the resource is active. Clones
3375 * are not allowed to migrate, so role can't be promoted.
3376 */
3377 history->rsc->role = pcmk_role_started;
3378
3379 // Check for migrate_from on the target
3380 target_migrate_from = find_lrm_op(history->rsc->id,
3382 PCMK_OCF_OK, history->rsc->cluster);
3383
3384 if (/* If the resource state is unknown on the target, it will likely be
3385 * probed there.
3386 * Don't just consider it running there. We will get back here anyway in
3387 * case the probe detects it's running there.
3388 */
3389 !unknown_on_node(history->rsc, target)
3390 /* If the resource has newer state on the target after the migration
3391 * events, this migrate_to no longer matters for the target.
3392 */
3393 && !newer_state_after_migrate(history->rsc->id, target, history->xml,
3394 target_migrate_from,
3395 history->rsc->cluster)) {
3396 /* The resource has no newer state on the target, so assume it's still
3397 * active there.
3398 * (if it is up).
3399 */
3400 pcmk_node_t *target_node = pcmk_find_node(history->rsc->cluster,
3401 target);
3402
3403 if (target_node && target_node->details->online) {
3404 native_add_running(history->rsc, target_node, history->rsc->cluster,
3405 FALSE);
3406 }
3407
3408 } else if (!non_monitor_after(history->rsc->id, source, history->xml, true,
3409 history->rsc->cluster)) {
3410 /* We know the resource has newer state on the target, but this
3411 * migrate_to still matters for the source as long as there's no newer
3412 * non-monitor operation there.
3413 */
3414
3415 // Mark node as having dangling migration so we can force a stop later
3416 history->rsc->dangling_migrations =
3417 g_list_prepend(history->rsc->dangling_migrations,
3418 (gpointer) history->node);
3419 }
3420}
3421
3428static void
3429unpack_migrate_from_failure(struct action_history *history)
3430{
3431 xmlNode *source_migrate_to = NULL;
3432 const char *source = NULL;
3433 const char *target = NULL;
3434
3435 // Get source and target node names from XML
3436 if (get_migration_node_names(history->xml, NULL, history->node, &source,
3437 &target) != pcmk_rc_ok) {
3438 return;
3439 }
3440
3441 /* If a migration failed, we have to assume the resource is active. Clones
3442 * are not allowed to migrate, so role can't be promoted.
3443 */
3444 history->rsc->role = pcmk_role_started;
3445
3446 // Check for a migrate_to on the source
3447 source_migrate_to = find_lrm_op(history->rsc->id, PCMK_ACTION_MIGRATE_TO,
3448 source, target, PCMK_OCF_OK,
3449 history->rsc->cluster);
3450
3451 if (/* If the resource state is unknown on the source, it will likely be
3452 * probed there.
3453 * Don't just consider it running there. We will get back here anyway in
3454 * case the probe detects it's running there.
3455 */
3456 !unknown_on_node(history->rsc, source)
3457 /* If the resource has newer state on the source after the migration
3458 * events, this migrate_from no longer matters for the source.
3459 */
3460 && !newer_state_after_migrate(history->rsc->id, source,
3461 source_migrate_to, history->xml,
3462 history->rsc->cluster)) {
3463 /* The resource has no newer state on the source, so assume it's still
3464 * active there (if it is up).
3465 */
3466 pcmk_node_t *source_node = pcmk_find_node(history->rsc->cluster,
3467 source);
3468
3469 if (source_node && source_node->details->online) {
3470 native_add_running(history->rsc, source_node, history->rsc->cluster,
3471 TRUE);
3472 }
3473 }
3474}
3475
3482static void
3483record_failed_op(struct action_history *history)
3484{
3485 if (!(history->node->details->online)) {
3486 return;
3487 }
3488
3489 for (const xmlNode *xIter = history->rsc->cluster->failed->children;
3490 xIter != NULL; xIter = xIter->next) {
3491
3492 const char *key = pcmk__xe_history_key(xIter);
3493 const char *uname = crm_element_value(xIter, PCMK_XA_UNAME);
3494
3495 if (pcmk__str_eq(history->key, key, pcmk__str_none)
3496 && pcmk__str_eq(uname, history->node->details->uname,
3497 pcmk__str_casei)) {
3498 crm_trace("Skipping duplicate entry %s on %s",
3499 history->key, pcmk__node_name(history->node));
3500 return;
3501 }
3502 }
3503
3504 crm_trace("Adding entry for %s on %s to failed action list",
3505 history->key, pcmk__node_name(history->node));
3506 crm_xml_add(history->xml, PCMK_XA_UNAME, history->node->details->uname);
3507 crm_xml_add(history->xml, PCMK__XA_RSC_ID, history->rsc->id);
3508 pcmk__xml_copy(history->rsc->cluster->failed, history->xml);
3509}
3510
3511static char *
3512last_change_str(const xmlNode *xml_op)
3513{
3514 time_t when;
3515 char *result = NULL;
3516
3518 &when) == pcmk_ok) {
3519 char *when_s = pcmk__epoch2str(&when, 0);
3520 const char *p = strchr(when_s, ' ');
3521
3522 // Skip day of week to make message shorter
3523 if ((p != NULL) && (*(++p) != '\0')) {
3525 }
3526 free(when_s);
3527 }
3528
3529 if (result == NULL) {
3530 result = pcmk__str_copy("unknown_time");
3531 }
3532
3533 return result;
3534}
3535
3548static int
3549cmp_on_fail(enum action_fail_response first, enum action_fail_response second)
3550{
3551 switch (first) {
3553 switch (second) {
3555 return 1;
3557 return 0;
3558 default:
3559 return -1;
3560 }
3561 break;
3562
3564 switch (second) {
3568 return 1;
3570 return 0;
3571 default:
3572 return -1;
3573 }
3574 break;
3575
3577 switch (second) {
3582 return 1;
3584 return 0;
3585 default:
3586 return -1;
3587 }
3588 break;
3589
3590 default:
3591 break;
3592 }
3593 switch (second) {
3595 return (first == pcmk_on_fail_ignore)? -1 : 1;
3596
3598 switch (first) {
3602 return -1;
3603 default:
3604 return 1;
3605 }
3606 break;
3607
3609 switch (first) {
3614 return -1;
3615 default:
3616 return 1;
3617 }
3618 break;
3619
3620 default:
3621 break;
3622 }
3623 return first - second;
3624}
3625
3632static void
3633ban_from_all_nodes(pcmk_resource_t *rsc)
3634{
3635 int score = -PCMK_SCORE_INFINITY;
3636 pcmk_resource_t *fail_rsc = rsc;
3637
3638 if (fail_rsc->parent != NULL) {
3639 pcmk_resource_t *parent = uber_parent(fail_rsc);
3640
3641 if (pcmk__is_anonymous_clone(parent)) {
3642 /* For anonymous clones, if an operation with
3643 * PCMK_META_ON_FAIL=PCMK_VALUE_STOP fails for any instance, the
3644 * entire clone must stop.
3645 */
3646 fail_rsc = parent;
3647 }
3648 }
3649
3650 // Ban the resource from all nodes
3651 crm_notice("%s will not be started under current conditions", fail_rsc->id);
3652 if (fail_rsc->allowed_nodes != NULL) {
3653 g_hash_table_destroy(fail_rsc->allowed_nodes);
3654 }
3656 g_hash_table_foreach(fail_rsc->allowed_nodes, set_node_score, &score);
3657}
3658
3667static void
3668unpack_failure_handling(struct action_history *history,
3669 enum action_fail_response *on_fail,
3670 enum rsc_role_e *fail_role)
3671{
3672 xmlNode *config = pcmk__find_action_config(history->rsc, history->task,
3673 history->interval_ms, true);
3674
3675 GHashTable *meta = pcmk__unpack_action_meta(history->rsc, history->node,
3676 history->task,
3677 history->interval_ms, config);
3678
3679 const char *on_fail_str = g_hash_table_lookup(meta, PCMK_META_ON_FAIL);
3680
3681 *on_fail = pcmk__parse_on_fail(history->rsc, history->task,
3682 history->interval_ms, on_fail_str);
3683 *fail_role = pcmk__role_after_failure(history->rsc, history->task, *on_fail,
3684 meta);
3685 g_hash_table_destroy(meta);
3686}
3687
3698static void
3699unpack_rsc_op_failure(struct action_history *history,
3700 enum action_fail_response config_on_fail,
3701 enum rsc_role_e fail_role, xmlNode **last_failure,
3702 enum action_fail_response *on_fail)
3703{
3704 bool is_probe = false;
3705 char *last_change_s = NULL;
3706
3707 *last_failure = history->xml;
3708
3709 is_probe = pcmk_xe_is_probe(history->xml);
3710 last_change_s = last_change_str(history->xml);
3711
3712 if (!pcmk_is_set(history->rsc->cluster->flags, pcmk_sched_symmetric_cluster)
3713 && (history->exit_status == PCMK_OCF_NOT_INSTALLED)) {
3714 crm_trace("Unexpected result (%s%s%s) was recorded for "
3715 "%s of %s on %s at %s " CRM_XS " exit-status=%d id=%s",
3716 services_ocf_exitcode_str(history->exit_status),
3717 (pcmk__str_empty(history->exit_reason)? "" : ": "),
3718 pcmk__s(history->exit_reason, ""),
3719 (is_probe? "probe" : history->task), history->rsc->id,
3720 pcmk__node_name(history->node), last_change_s,
3721 history->exit_status, history->id);
3722 } else {
3723 pcmk__sched_warn("Unexpected result (%s%s%s) was recorded for %s of "
3724 "%s on %s at %s " CRM_XS " exit-status=%d id=%s",
3725 services_ocf_exitcode_str(history->exit_status),
3726 (pcmk__str_empty(history->exit_reason)? "" : ": "),
3727 pcmk__s(history->exit_reason, ""),
3728 (is_probe? "probe" : history->task), history->rsc->id,
3729 pcmk__node_name(history->node), last_change_s,
3730 history->exit_status, history->id);
3731
3732 if (is_probe && (history->exit_status != PCMK_OCF_OK)
3733 && (history->exit_status != PCMK_OCF_NOT_RUNNING)
3734 && (history->exit_status != PCMK_OCF_RUNNING_PROMOTED)) {
3735
3736 /* A failed (not just unexpected) probe result could mean the user
3737 * didn't know resources will be probed even where they can't run.
3738 */
3739 crm_notice("If it is not possible for %s to run on %s, see "
3740 "the " PCMK_XA_RESOURCE_DISCOVERY " option for location "
3741 "constraints",
3742 history->rsc->id, pcmk__node_name(history->node));
3743 }
3744
3745 record_failed_op(history);
3746 }
3747
3748 free(last_change_s);
3749
3750 if (cmp_on_fail(*on_fail, config_on_fail) < 0) {
3751 pcmk__rsc_trace(history->rsc, "on-fail %s -> %s for %s",
3752 pcmk_on_fail_text(*on_fail),
3753 pcmk_on_fail_text(config_on_fail), history->key);
3754 *on_fail = config_on_fail;
3755 }
3756
3757 if (strcmp(history->task, PCMK_ACTION_STOP) == 0) {
3758 resource_location(history->rsc, history->node, -PCMK_SCORE_INFINITY,
3759 "__stop_fail__", history->rsc->cluster);
3760
3761 } else if (strcmp(history->task, PCMK_ACTION_MIGRATE_TO) == 0) {
3762 unpack_migrate_to_failure(history);
3763
3764 } else if (strcmp(history->task, PCMK_ACTION_MIGRATE_FROM) == 0) {
3765 unpack_migrate_from_failure(history);
3766
3767 } else if (strcmp(history->task, PCMK_ACTION_PROMOTE) == 0) {
3768 history->rsc->role = pcmk_role_promoted;
3769
3770 } else if (strcmp(history->task, PCMK_ACTION_DEMOTE) == 0) {
3771 if (config_on_fail == pcmk_on_fail_block) {
3772 history->rsc->role = pcmk_role_promoted;
3774 "demote with " PCMK_META_ON_FAIL "=block");
3775
3776 } else if (history->exit_status == PCMK_OCF_NOT_RUNNING) {
3777 history->rsc->role = pcmk_role_stopped;
3778
3779 } else {
3780 /* Staying in the promoted role would put the scheduler and
3781 * controller into a loop. Setting the role to unpromoted is not
3782 * dangerous because the resource will be stopped as part of
3783 * recovery, and any promotion will be ordered after that stop.
3784 */
3785 history->rsc->role = pcmk_role_unpromoted;
3786 }
3787 }
3788
3789 if (is_probe && (history->exit_status == PCMK_OCF_NOT_INSTALLED)) {
3790 /* leave stopped */
3791 pcmk__rsc_trace(history->rsc, "Leaving %s stopped", history->rsc->id);
3792 history->rsc->role = pcmk_role_stopped;
3793
3794 } else if (history->rsc->role < pcmk_role_started) {
3795 pcmk__rsc_trace(history->rsc, "Setting %s active", history->rsc->id);
3796 set_active(history->rsc);
3797 }
3798
3799 pcmk__rsc_trace(history->rsc,
3800 "Resource %s: role=%s unclean=%s on_fail=%s fail_role=%s",
3801 history->rsc->id, pcmk_role_text(history->rsc->role),
3802 pcmk__btoa(history->node->details->unclean),
3803 pcmk_on_fail_text(config_on_fail),
3804 pcmk_role_text(fail_role));
3805
3806 if ((fail_role != pcmk_role_started)
3807 && (history->rsc->next_role < fail_role)) {
3808 pe__set_next_role(history->rsc, fail_role, "failure");
3809 }
3810
3811 if (fail_role == pcmk_role_stopped) {
3812 ban_from_all_nodes(history->rsc);
3813 }
3814}
3815
3825static void
3826block_if_unrecoverable(struct action_history *history)
3827{
3828 char *last_change_s = NULL;
3829
3830 if (strcmp(history->task, PCMK_ACTION_STOP) != 0) {
3831 return; // All actions besides stop are always recoverable
3832 }
3833 if (pe_can_fence(history->node->details->data_set, history->node)) {
3834 return; // Failed stops are recoverable via fencing
3835 }
3836
3837 last_change_s = last_change_str(history->xml);
3838 pcmk__sched_err("No further recovery can be attempted for %s "
3839 "because %s on %s failed (%s%s%s) at %s "
3840 CRM_XS " rc=%d id=%s",
3841 history->rsc->id, history->task,
3842 pcmk__node_name(history->node),
3843 services_ocf_exitcode_str(history->exit_status),
3844 (pcmk__str_empty(history->exit_reason)? "" : ": "),
3845 pcmk__s(history->exit_reason, ""),
3846 last_change_s, history->exit_status, history->id);
3847
3848 free(last_change_s);
3849
3852}
3853
3863static inline void
3864remap_because(struct action_history *history, const char **why, int value,
3865 const char *reason)
3866{
3867 if (history->execution_status != value) {
3868 history->execution_status = value;
3869 *why = reason;
3870 }
3871}
3872
3895static void
3896remap_operation(struct action_history *history,
3897 enum action_fail_response *on_fail, bool expired)
3898{
3899 bool is_probe = false;
3900 int orig_exit_status = history->exit_status;
3901 int orig_exec_status = history->execution_status;
3902 const char *why = NULL;
3903 const char *task = history->task;
3904
3905 // Remap degraded results to their successful counterparts
3906 history->exit_status = pcmk__effective_rc(history->exit_status);
3907 if (history->exit_status != orig_exit_status) {
3908 why = "degraded result";
3909 if (!expired && (!history->node->details->shutdown
3910 || history->node->details->online)) {
3911 record_failed_op(history);
3912 }
3913 }
3914
3915 if (!pcmk__is_bundled(history->rsc)
3916 && pcmk_xe_mask_probe_failure(history->xml)
3917 && ((history->execution_status != PCMK_EXEC_DONE)
3918 || (history->exit_status != PCMK_OCF_NOT_RUNNING))) {
3919 history->execution_status = PCMK_EXEC_DONE;
3920 history->exit_status = PCMK_OCF_NOT_RUNNING;
3921 why = "equivalent probe result";
3922 }
3923
3924 /* If the executor reported an execution status of anything but done or
3925 * error, consider that final. But for done or error, we know better whether
3926 * it should be treated as a failure or not, because we know the expected
3927 * result.
3928 */
3929 switch (history->execution_status) {
3930 case PCMK_EXEC_DONE:
3931 case PCMK_EXEC_ERROR:
3932 break;
3933
3934 // These should be treated as node-fatal
3937 remap_because(history, &why, PCMK_EXEC_ERROR_HARD,
3938 "node-fatal error");
3939 goto remap_done;
3940
3941 default:
3942 goto remap_done;
3943 }
3944
3945 is_probe = pcmk_xe_is_probe(history->xml);
3946 if (is_probe) {
3947 task = "probe";
3948 }
3949
3950 if (history->expected_exit_status < 0) {
3951 /* Pre-1.0 Pacemaker versions, and Pacemaker 1.1.6 or earlier with
3952 * Heartbeat 2.0.7 or earlier as the cluster layer, did not include the
3953 * expected exit status in the transition key, which (along with the
3954 * similar case of a corrupted transition key in the CIB) will be
3955 * reported to this function as -1. Pacemaker 2.0+ does not support
3956 * rolling upgrades from those versions or processing of saved CIB files
3957 * from those versions, so we do not need to care much about this case.
3958 */
3959 remap_because(history, &why, PCMK_EXEC_ERROR,
3960 "obsolete history format");
3961 pcmk__config_warn("Expected result not found for %s on %s "
3962 "(corrupt or obsolete CIB?)",
3963 history->key, pcmk__node_name(history->node));
3964
3965 } else if (history->exit_status == history->expected_exit_status) {
3966 remap_because(history, &why, PCMK_EXEC_DONE, "expected result");
3967
3968 } else {
3969 remap_because(history, &why, PCMK_EXEC_ERROR, "unexpected result");
3970 pcmk__rsc_debug(history->rsc,
3971 "%s on %s: expected %d (%s), got %d (%s%s%s)",
3972 history->key, pcmk__node_name(history->node),
3973 history->expected_exit_status,
3974 services_ocf_exitcode_str(history->expected_exit_status),
3975 history->exit_status,
3976 services_ocf_exitcode_str(history->exit_status),
3977 (pcmk__str_empty(history->exit_reason)? "" : ": "),
3978 pcmk__s(history->exit_reason, ""));
3979 }
3980
3981 switch (history->exit_status) {
3982 case PCMK_OCF_OK:
3983 if (is_probe
3984 && (history->expected_exit_status == PCMK_OCF_NOT_RUNNING)) {
3985 char *last_change_s = last_change_str(history->xml);
3986
3987 remap_because(history, &why, PCMK_EXEC_DONE, "probe");
3988 pcmk__rsc_info(history->rsc,
3989 "Probe found %s active on %s at %s",
3990 history->rsc->id, pcmk__node_name(history->node),
3991 last_change_s);
3992 free(last_change_s);
3993 }
3994 break;
3995
3997 if (is_probe
3998 || (history->expected_exit_status == history->exit_status)
3999 || !pcmk_is_set(history->rsc->flags, pcmk_rsc_managed)) {
4000
4001 /* For probes, recurring monitors for the Stopped role, and
4002 * unmanaged resources, "not running" is not considered a
4003 * failure.
4004 */
4005 remap_because(history, &why, PCMK_EXEC_DONE, "exit status");
4006 history->rsc->role = pcmk_role_stopped;
4007 *on_fail = pcmk_on_fail_ignore;
4009 "not running");
4010 }
4011 break;
4012
4014 if (is_probe
4015 && (history->exit_status != history->expected_exit_status)) {
4016 char *last_change_s = last_change_str(history->xml);
4017
4018 remap_because(history, &why, PCMK_EXEC_DONE, "probe");
4019 pcmk__rsc_info(history->rsc,
4020 "Probe found %s active and promoted on %s at %s",
4021 history->rsc->id,
4022 pcmk__node_name(history->node), last_change_s);
4023 free(last_change_s);
4024 }
4025 if (!expired
4026 || (history->exit_status == history->expected_exit_status)) {
4027 history->rsc->role = pcmk_role_promoted;
4028 }
4029 break;
4030
4032 if (!expired) {
4033 history->rsc->role = pcmk_role_promoted;
4034 }
4035 remap_because(history, &why, PCMK_EXEC_ERROR, "exit status");
4036 break;
4037
4039 remap_because(history, &why, PCMK_EXEC_ERROR_FATAL, "exit status");
4040 break;
4041
4043 {
4044 guint interval_ms = 0;
4046 &interval_ms);
4047
4048 if (interval_ms == 0) {
4049 if (!expired) {
4050 block_if_unrecoverable(history);
4051 }
4052 remap_because(history, &why, PCMK_EXEC_ERROR_HARD,
4053 "exit status");
4054 } else {
4055 remap_because(history, &why, PCMK_EXEC_NOT_SUPPORTED,
4056 "exit status");
4057 }
4058 }
4059 break;
4060
4064 if (!expired) {
4065 block_if_unrecoverable(history);
4066 }
4067 remap_because(history, &why, PCMK_EXEC_ERROR_HARD, "exit status");
4068 break;
4069
4070 default:
4071 if (history->execution_status == PCMK_EXEC_DONE) {
4072 char *last_change_s = last_change_str(history->xml);
4073
4074 crm_info("Treating unknown exit status %d from %s of %s "
4075 "on %s at %s as failure",
4076 history->exit_status, task, history->rsc->id,
4077 pcmk__node_name(history->node), last_change_s);
4078 remap_because(history, &why, PCMK_EXEC_ERROR,
4079 "unknown exit status");
4080 free(last_change_s);
4081 }
4082 break;
4083 }
4084
4085remap_done:
4086 if (why != NULL) {
4087 pcmk__rsc_trace(history->rsc,
4088 "Remapped %s result from [%s: %s] to [%s: %s] "
4089 "because of %s",
4090 history->key, pcmk_exec_status_str(orig_exec_status),
4091 crm_exit_str(orig_exit_status),
4092 pcmk_exec_status_str(history->execution_status),
4093 crm_exit_str(history->exit_status), why);
4094 }
4095}
4096
4097// return TRUE if start or monitor last failure but parameters changed
4098static bool
4099should_clear_for_param_change(const xmlNode *xml_op, const char *task,
4100 pcmk_resource_t *rsc, pcmk_node_t *node)
4101{
4104 /* We haven't allocated resources yet, so we can't reliably
4105 * substitute addr parameters for the REMOTE_CONTAINER_HACK.
4106 * When that's needed, defer the check until later.
4107 */
4109 rsc->cluster);
4110
4111 } else {
4112 pcmk__op_digest_t *digest_data = NULL;
4113
4114 digest_data = rsc_action_digest_cmp(rsc, xml_op, node,
4115 rsc->cluster);
4116 switch (digest_data->rc) {
4118 crm_trace("Resource %s history entry %s on %s"
4119 " has no digest to compare",
4120 rsc->id, pcmk__xe_history_key(xml_op),
4121 node->details->id);
4122 break;
4123 case pcmk__digest_match:
4124 break;
4125 default:
4126 return TRUE;
4127 }
4128 }
4129 }
4130 return FALSE;
4131}
4132
4133// Order action after fencing of remote node, given connection rsc
4134static void
4135order_after_remote_fencing(pcmk_action_t *action, pcmk_resource_t *remote_conn,
4137{
4138 pcmk_node_t *remote_node = pcmk_find_node(scheduler, remote_conn->id);
4139
4140 if (remote_node) {
4141 pcmk_action_t *fence = pe_fence_op(remote_node, NULL, TRUE, NULL,
4142 FALSE, scheduler);
4143
4145 }
4146}
4147
4148static bool
4149should_ignore_failure_timeout(const pcmk_resource_t *rsc, const char *task,
4150 guint interval_ms, bool is_last_failure)
4151{
4152 /* Clearing failures of recurring monitors has special concerns. The
4153 * executor reports only changes in the monitor result, so if the
4154 * monitor is still active and still getting the same failure result,
4155 * that will go undetected after the failure is cleared.
4156 *
4157 * Also, the operation history will have the time when the recurring
4158 * monitor result changed to the given code, not the time when the
4159 * result last happened.
4160 *
4161 * @TODO We probably should clear such failures only when the failure
4162 * timeout has passed since the last occurrence of the failed result.
4163 * However we don't record that information. We could maybe approximate
4164 * that by clearing only if there is a more recent successful monitor or
4165 * stop result, but we don't even have that information at this point
4166 * since we are still unpacking the resource's operation history.
4167 *
4168 * This is especially important for remote connection resources with a
4169 * reconnect interval, so in that case, we skip clearing failures
4170 * if the remote node hasn't been fenced.
4171 */
4172 if (rsc->remote_reconnect_ms
4174 && (interval_ms != 0)
4175 && pcmk__str_eq(task, PCMK_ACTION_MONITOR, pcmk__str_casei)) {
4176
4177 pcmk_node_t *remote_node = pcmk_find_node(rsc->cluster, rsc->id);
4178
4179 if (remote_node && !remote_node->details->remote_was_fenced) {
4180 if (is_last_failure) {
4181 crm_info("Waiting to clear monitor failure for remote node %s"
4182 " until fencing has occurred", rsc->id);
4183 }
4184 return TRUE;
4185 }
4186 }
4187 return FALSE;
4188}
4189
4208static bool
4209check_operation_expiry(struct action_history *history)
4210{
4211 bool expired = false;
4212 bool is_last_failure = pcmk__ends_with(history->id, "_last_failure_0");
4213 time_t last_run = 0;
4214 int unexpired_fail_count = 0;
4215 const char *clear_reason = NULL;
4216
4217 if (history->execution_status == PCMK_EXEC_NOT_INSTALLED) {
4218 pcmk__rsc_trace(history->rsc,
4219 "Resource history entry %s on %s is not expired: "
4220 "Not Installed does not expire",
4221 history->id, pcmk__node_name(history->node));
4222 return false; // "Not installed" must always be cleared manually
4223 }
4224
4225 if ((history->rsc->failure_timeout > 0)
4227 &last_run) == 0)) {
4228
4229 /* Resource has a PCMK_META_FAILURE_TIMEOUT and history entry has a
4230 * timestamp
4231 */
4232
4233 time_t now = get_effective_time(history->rsc->cluster);
4234 time_t last_failure = 0;
4235
4236 // Is this particular operation history older than the failure timeout?
4237 if ((now >= (last_run + history->rsc->failure_timeout))
4238 && !should_ignore_failure_timeout(history->rsc, history->task,
4239 history->interval_ms,
4240 is_last_failure)) {
4241 expired = true;
4242 }
4243
4244 // Does the resource as a whole have an unexpired fail count?
4245 unexpired_fail_count = pe_get_failcount(history->node, history->rsc,
4246 &last_failure,
4248 history->xml);
4249
4250 // Update scheduler recheck time according to *last* failure
4251 crm_trace("%s@%lld is %sexpired @%lld with unexpired_failures=%d timeout=%ds"
4252 " last-failure@%lld",
4253 history->id, (long long) last_run, (expired? "" : "not "),
4254 (long long) now, unexpired_fail_count,
4255 history->rsc->failure_timeout, (long long) last_failure);
4256 last_failure += history->rsc->failure_timeout + 1;
4257 if (unexpired_fail_count && (now < last_failure)) {
4258 pe__update_recheck_time(last_failure, history->rsc->cluster,
4259 "fail count expiration");
4260 }
4261 }
4262
4263 if (expired) {
4264 if (pe_get_failcount(history->node, history->rsc, NULL,
4265 pcmk__fc_default, history->xml)) {
4266 // There is a fail count ignoring timeout
4267
4268 if (unexpired_fail_count == 0) {
4269 // There is no fail count considering timeout
4270 clear_reason = "it expired";
4271
4272 } else {
4273 /* This operation is old, but there is an unexpired fail count.
4274 * In a properly functioning cluster, this should only be
4275 * possible if this operation is not a failure (otherwise the
4276 * fail count should be expired too), so this is really just a
4277 * failsafe.
4278 */
4279 pcmk__rsc_trace(history->rsc,
4280 "Resource history entry %s on %s is not "
4281 "expired: Unexpired fail count",
4282 history->id, pcmk__node_name(history->node));
4283 expired = false;
4284 }
4285
4286 } else if (is_last_failure
4287 && (history->rsc->remote_reconnect_ms != 0)) {
4288 /* Clear any expired last failure when reconnect interval is set,
4289 * even if there is no fail count.
4290 */
4291 clear_reason = "reconnect interval is set";
4292 }
4293 }
4294
4295 if (!expired && is_last_failure
4296 && should_clear_for_param_change(history->xml, history->task,
4297 history->rsc, history->node)) {
4298 clear_reason = "resource parameters have changed";
4299 }
4300
4301 if (clear_reason != NULL) {
4302 pcmk_action_t *clear_op = NULL;
4303
4304 // Schedule clearing of the fail count
4305 clear_op = pe__clear_failcount(history->rsc, history->node,
4306 clear_reason, history->rsc->cluster);
4307
4308 if (pcmk_is_set(history->rsc->cluster->flags,
4310 && (history->rsc->remote_reconnect_ms != 0)) {
4311 /* If we're clearing a remote connection due to a reconnect
4312 * interval, we want to wait until any scheduled fencing
4313 * completes.
4314 *
4315 * We could limit this to remote_node->details->unclean, but at
4316 * this point, that's always true (it won't be reliable until
4317 * after unpack_node_history() is done).
4318 */
4319 crm_info("Clearing %s failure will wait until any scheduled "
4320 "fencing of %s completes",
4321 history->task, history->rsc->id);
4322 order_after_remote_fencing(clear_op, history->rsc,
4323 history->rsc->cluster);
4324 }
4325 }
4326
4327 if (expired && (history->interval_ms == 0)
4328 && pcmk__str_eq(history->task, PCMK_ACTION_MONITOR, pcmk__str_none)) {
4329 switch (history->exit_status) {
4330 case PCMK_OCF_OK:
4333 case PCMK_OCF_DEGRADED:
4335 // Don't expire probes that return these values
4336 pcmk__rsc_trace(history->rsc,
4337 "Resource history entry %s on %s is not "
4338 "expired: Probe result",
4339 history->id, pcmk__node_name(history->node));
4340 expired = false;
4341 break;
4342 }
4343 }
4344
4345 return expired;
4346}
4347
4348int
4349pe__target_rc_from_xml(const xmlNode *xml_op)
4350{
4351 int target_rc = 0;
4352 const char *key = crm_element_value(xml_op, PCMK__XA_TRANSITION_KEY);
4353
4354 if (key == NULL) {
4355 return -1;
4356 }
4357 decode_transition_key(key, NULL, NULL, NULL, &target_rc);
4358 return target_rc;
4359}
4360
4370static void
4371update_resource_state(struct action_history *history, int exit_status,
4372 const xmlNode *last_failure,
4373 enum action_fail_response *on_fail)
4374{
4375 bool clear_past_failure = false;
4376
4377 if ((exit_status == PCMK_OCF_NOT_INSTALLED)
4378 || (!pcmk__is_bundled(history->rsc)
4379 && pcmk_xe_mask_probe_failure(history->xml))) {
4380 history->rsc->role = pcmk_role_stopped;
4381
4382 } else if (exit_status == PCMK_OCF_NOT_RUNNING) {
4383 clear_past_failure = true;
4384
4385 } else if (pcmk__str_eq(history->task, PCMK_ACTION_MONITOR,
4386 pcmk__str_none)) {
4387 if ((last_failure != NULL)
4388 && pcmk__str_eq(history->key, pcmk__xe_history_key(last_failure),
4389 pcmk__str_none)) {
4390 clear_past_failure = true;
4391 }
4392 if (history->rsc->role < pcmk_role_started) {
4393 set_active(history->rsc);
4394 }
4395
4396 } else if (pcmk__str_eq(history->task, PCMK_ACTION_START, pcmk__str_none)) {
4397 history->rsc->role = pcmk_role_started;
4398 clear_past_failure = true;
4399
4400 } else if (pcmk__str_eq(history->task, PCMK_ACTION_STOP, pcmk__str_none)) {
4401 history->rsc->role = pcmk_role_stopped;
4402 clear_past_failure = true;
4403
4404 } else if (pcmk__str_eq(history->task, PCMK_ACTION_PROMOTE,
4405 pcmk__str_none)) {
4406 history->rsc->role = pcmk_role_promoted;
4407 clear_past_failure = true;
4408
4409 } else if (pcmk__str_eq(history->task, PCMK_ACTION_DEMOTE,
4410 pcmk__str_none)) {
4411 if (*on_fail == pcmk_on_fail_demote) {
4412 /* Demote clears an error only if
4413 * PCMK_META_ON_FAIL=PCMK_VALUE_DEMOTE
4414 */
4415 clear_past_failure = true;
4416 }
4417 history->rsc->role = pcmk_role_unpromoted;
4418
4419 } else if (pcmk__str_eq(history->task, PCMK_ACTION_MIGRATE_FROM,
4420 pcmk__str_none)) {
4421 history->rsc->role = pcmk_role_started;
4422 clear_past_failure = true;
4423
4424 } else if (pcmk__str_eq(history->task, PCMK_ACTION_MIGRATE_TO,
4425 pcmk__str_none)) {
4426 unpack_migrate_to_success(history);
4427
4428 } else if (history->rsc->role < pcmk_role_started) {
4429 pcmk__rsc_trace(history->rsc, "%s active on %s",
4430 history->rsc->id, pcmk__node_name(history->node));
4431 set_active(history->rsc);
4432 }
4433
4434 if (!clear_past_failure) {
4435 return;
4436 }
4437
4438 switch (*on_fail) {
4439 case pcmk_on_fail_stop:
4440 case pcmk_on_fail_ban:
4443 pcmk__rsc_trace(history->rsc,
4444 "%s (%s) is not cleared by a completed %s",
4445 history->rsc->id, pcmk_on_fail_text(*on_fail),
4446 history->task);
4447 break;
4448
4449 case pcmk_on_fail_block:
4454 *on_fail = pcmk_on_fail_ignore;
4456 "clear past failures");
4457 break;
4458
4460 if (history->rsc->remote_reconnect_ms == 0) {
4461 /* With no reconnect interval, the connection is allowed to
4462 * start again after the remote node is fenced and
4463 * completely stopped. (With a reconnect interval, we wait
4464 * for the failure to be cleared entirely before attempting
4465 * to reconnect.)
4466 */
4467 *on_fail = pcmk_on_fail_ignore;
4469 "clear past failures and reset remote");
4470 }
4471 break;
4472 }
4473}
4474
4483static inline bool
4484can_affect_state(struct action_history *history)
4485{
4486#if 0
4487 /* @COMPAT It might be better to parse only actions we know we're interested
4488 * in, rather than exclude a couple we don't. However that would be a
4489 * behavioral change that should be done at a major or minor series release.
4490 * Currently, unknown operations can affect whether a resource is considered
4491 * active and/or failed.
4492 */
4493 return pcmk__str_any_of(history->task, PCMK_ACTION_MONITOR,
4497 "asyncmon", NULL);
4498#else
4499 return !pcmk__str_any_of(history->task, PCMK_ACTION_NOTIFY,
4500 PCMK_ACTION_META_DATA, NULL);
4501#endif
4502}
4503
4512static int
4513unpack_action_result(struct action_history *history)
4514{
4515 if ((crm_element_value_int(history->xml, PCMK__XA_OP_STATUS,
4516 &(history->execution_status)) < 0)
4517 || (history->execution_status < PCMK_EXEC_PENDING)
4518 || (history->execution_status > PCMK_EXEC_MAX)
4519 || (history->execution_status == PCMK_EXEC_CANCELLED)) {
4520 pcmk__config_err("Ignoring resource history entry %s for %s on %s "
4521 "with invalid " PCMK__XA_OP_STATUS " '%s'",
4522 history->id, history->rsc->id,
4523 pcmk__node_name(history->node),
4524 pcmk__s(crm_element_value(history->xml,
4526 ""));
4527 return pcmk_rc_unpack_error;
4528 }
4529 if ((crm_element_value_int(history->xml, PCMK__XA_RC_CODE,
4530 &(history->exit_status)) < 0)
4531 || (history->exit_status < 0) || (history->exit_status > CRM_EX_MAX)) {
4532#if 0
4533 /* @COMPAT We should ignore malformed entries, but since that would
4534 * change behavior, it should be done at a major or minor series
4535 * release.
4536 */
4537 pcmk__config_err("Ignoring resource history entry %s for %s on %s "
4538 "with invalid " PCMK__XA_RC_CODE " '%s'",
4539 history->id, history->rsc->id,
4540 pcmk__node_name(history->node),
4541 pcmk__s(crm_element_value(history->xml,
4543 ""));
4544 return pcmk_rc_unpack_error;
4545#else
4546 history->exit_status = CRM_EX_ERROR;
4547#endif
4548 }
4549 history->exit_reason = crm_element_value(history->xml, PCMK_XA_EXIT_REASON);
4550 return pcmk_rc_ok;
4551}
4552
4563static int
4564process_expired_result(struct action_history *history, int orig_exit_status)
4565{
4566 if (!pcmk__is_bundled(history->rsc)
4567 && pcmk_xe_mask_probe_failure(history->xml)
4568 && (orig_exit_status != history->expected_exit_status)) {
4569
4570 if (history->rsc->role <= pcmk_role_stopped) {
4571 history->rsc->role = pcmk_role_unknown;
4572 }
4573 crm_trace("Ignoring resource history entry %s for probe of %s on %s: "
4574 "Masked failure expired",
4575 history->id, history->rsc->id,
4576 pcmk__node_name(history->node));
4577 return pcmk_rc_ok;
4578 }
4579
4580 if (history->exit_status == history->expected_exit_status) {
4581 return pcmk_rc_undetermined; // Only failures expire
4582 }
4583
4584 if (history->interval_ms == 0) {
4585 crm_notice("Ignoring resource history entry %s for %s of %s on %s: "
4586 "Expired failure",
4587 history->id, history->task, history->rsc->id,
4588 pcmk__node_name(history->node));
4589 return pcmk_rc_ok;
4590 }
4591
4592 if (history->node->details->online && !history->node->details->unclean) {
4593 /* Reschedule the recurring action. schedule_cancel() won't work at
4594 * this stage, so as a hacky workaround, forcibly change the restart
4595 * digest so pcmk__check_action_config() does what we want later.
4596 *
4597 * @TODO We should skip this if there is a newer successful monitor.
4598 * Also, this causes rescheduling only if the history entry
4599 * has a PCMK__XA_OP_DIGEST (which the expire-non-blocked-failure
4600 * scheduler regression test doesn't, but that may not be a
4601 * realistic scenario in production).
4602 */
4603 crm_notice("Rescheduling %s-interval %s of %s on %s "
4604 "after failure expired",
4605 pcmk__readable_interval(history->interval_ms), history->task,
4606 history->rsc->id, pcmk__node_name(history->node));
4608 "calculated-failure-timeout");
4609 return pcmk_rc_ok;
4610 }
4611
4612 return pcmk_rc_undetermined;
4613}
4614
4624static void
4625mask_probe_failure(struct action_history *history, int orig_exit_status,
4626 const xmlNode *last_failure,
4627 enum action_fail_response *on_fail)
4628{
4629 pcmk_resource_t *ban_rsc = history->rsc;
4630
4631 if (!pcmk_is_set(history->rsc->flags, pcmk_rsc_unique)) {
4632 ban_rsc = uber_parent(history->rsc);
4633 }
4634
4635 crm_notice("Treating probe result '%s' for %s on %s as 'not running'",
4636 services_ocf_exitcode_str(orig_exit_status), history->rsc->id,
4637 pcmk__node_name(history->node));
4638 update_resource_state(history, history->expected_exit_status, last_failure,
4639 on_fail);
4640 crm_xml_add(history->xml, PCMK_XA_UNAME, history->node->details->uname);
4641
4642 record_failed_op(history);
4643 resource_location(ban_rsc, history->node, -PCMK_SCORE_INFINITY,
4644 "masked-probe-failure", history->rsc->cluster);
4645}
4646
4659static bool
4660failure_is_newer(const struct action_history *history,
4661 const xmlNode *last_failure)
4662{
4663 guint failure_interval_ms = 0U;
4664 long long failure_change = 0LL;
4665 long long this_change = 0LL;
4666
4667 if (last_failure == NULL) {
4668 return false; // Resource has no last_failure entry
4669 }
4670
4671 if (!pcmk__str_eq(history->task,
4672 crm_element_value(last_failure, PCMK_XA_OPERATION),
4673 pcmk__str_none)) {
4674 return false; // last_failure is for different action
4675 }
4676
4677 if ((crm_element_value_ms(last_failure, PCMK_META_INTERVAL,
4678 &failure_interval_ms) != pcmk_ok)
4679 || (history->interval_ms != failure_interval_ms)) {
4680 return false; // last_failure is for action with different interval
4681 }
4682
4684 &this_change, 0LL) != pcmk_rc_ok)
4685 || (pcmk__scan_ll(crm_element_value(last_failure,
4687 &failure_change, 0LL) != pcmk_rc_ok)
4688 || (failure_change < this_change)) {
4689 return false; // Failure is not known to be newer
4690 }
4691
4692 return true;
4693}
4694
4702static void
4703process_pending_action(struct action_history *history,
4704 const xmlNode *last_failure)
4705{
4706 /* For recurring monitors, a failure is recorded only in RSC_last_failure_0,
4707 * and there might be a RSC_monitor_INTERVAL entry with the last successful
4708 * or pending result.
4709 *
4710 * If last_failure contains the failure of the pending recurring monitor
4711 * we're processing here, and is newer, the action is no longer pending.
4712 * (Pending results have call ID -1, which sorts last, so the last failure
4713 * if any should be known.)
4714 */
4715 if (failure_is_newer(history, last_failure)) {
4716 return;
4717 }
4718
4719 if (strcmp(history->task, PCMK_ACTION_START) == 0) {
4721 set_active(history->rsc);
4722
4723 } else if (strcmp(history->task, PCMK_ACTION_PROMOTE) == 0) {
4724 history->rsc->role = pcmk_role_promoted;
4725
4726 } else if ((strcmp(history->task, PCMK_ACTION_MIGRATE_TO) == 0)
4727 && history->node->details->unclean) {
4728 /* A migrate_to action is pending on a unclean source, so force a stop
4729 * on the target.
4730 */
4731 const char *migrate_target = NULL;
4732 pcmk_node_t *target = NULL;
4733
4734 migrate_target = crm_element_value(history->xml,
4736 target = pcmk_find_node(history->rsc->cluster, migrate_target);
4737 if (target != NULL) {
4738 stop_action(history->rsc, target, FALSE);
4739 }
4740 }
4741
4742 if (history->rsc->pending_task != NULL) {
4743 /* There should never be multiple pending actions, but as a failsafe,
4744 * just remember the first one processed for display purposes.
4745 */
4746 return;
4747 }
4748
4749 if (pcmk_is_probe(history->task, history->interval_ms)) {
4750 /* Pending probes are currently never displayed, even if pending
4751 * operations are requested. If we ever want to change that,
4752 * enable the below and the corresponding part of
4753 * native.c:native_pending_task().
4754 */
4755#if 0
4756 history->rsc->pending_task = strdup("probe");
4757 history->rsc->pending_node = history->node;
4758#endif
4759 } else {
4760 history->rsc->pending_task = strdup(history->task);
4761 history->rsc->pending_node = history->node;
4762 }
4763}
4764
4765static void
4766unpack_rsc_op(pcmk_resource_t *rsc, pcmk_node_t *node, xmlNode *xml_op,
4767 xmlNode **last_failure, enum action_fail_response *on_fail)
4768{
4769 int old_rc = 0;
4770 bool expired = false;
4771 pcmk_resource_t *parent = rsc;
4772 enum rsc_role_e fail_role = pcmk_role_unknown;
4773 enum action_fail_response failure_strategy = pcmk_on_fail_restart;
4774
4775 struct action_history history = {
4776 .rsc = rsc,
4777 .node = node,
4778 .xml = xml_op,
4779 .execution_status = PCMK_EXEC_UNKNOWN,
4780 };
4781
4782 CRM_CHECK(rsc && node && xml_op, return);
4783
4784 history.id = pcmk__xe_id(xml_op);
4785 if (history.id == NULL) {
4786 pcmk__config_err("Ignoring resource history entry for %s on %s "
4787 "without ID", rsc->id, pcmk__node_name(node));
4788 return;
4789 }
4790
4791 // Task and interval
4792 history.task = crm_element_value(xml_op, PCMK_XA_OPERATION);
4793 if (history.task == NULL) {
4794 pcmk__config_err("Ignoring resource history entry %s for %s on %s "
4795 "without " PCMK_XA_OPERATION,
4796 history.id, rsc->id, pcmk__node_name(node));
4797 return;
4798 }
4799 crm_element_value_ms(xml_op, PCMK_META_INTERVAL, &(history.interval_ms));
4800 if (!can_affect_state(&history)) {
4801 pcmk__rsc_trace(rsc,
4802 "Ignoring resource history entry %s for %s on %s "
4803 "with irrelevant action '%s'",
4804 history.id, rsc->id, pcmk__node_name(node),
4805 history.task);
4806 return;
4807 }
4808
4809 if (unpack_action_result(&history) != pcmk_rc_ok) {
4810 return; // Error already logged
4811 }
4812
4813 history.expected_exit_status = pe__target_rc_from_xml(xml_op);
4814 history.key = pcmk__xe_history_key(xml_op);
4815 crm_element_value_int(xml_op, PCMK__XA_CALL_ID, &(history.call_id));
4816
4817 pcmk__rsc_trace(rsc, "Unpacking %s (%s call %d on %s): %s (%s)",
4818 history.id, history.task, history.call_id,
4819 pcmk__node_name(node),
4820 pcmk_exec_status_str(history.execution_status),
4821 crm_exit_str(history.exit_status));
4822
4823 if (node->details->unclean) {
4824 pcmk__rsc_trace(rsc,
4825 "%s is running on %s, which is unclean (further action "
4826 "depends on value of stop's on-fail attribute)",
4827 rsc->id, pcmk__node_name(node));
4828 }
4829
4830 expired = check_operation_expiry(&history);
4831 old_rc = history.exit_status;
4832
4833 remap_operation(&history, on_fail, expired);
4834
4835 if (expired && (process_expired_result(&history, old_rc) == pcmk_rc_ok)) {
4836 goto done;
4837 }
4838
4839 if (!pcmk__is_bundled(rsc) && pcmk_xe_mask_probe_failure(xml_op)) {
4840 mask_probe_failure(&history, old_rc, *last_failure, on_fail);
4841 goto done;
4842 }
4843
4844 if (!pcmk_is_set(rsc->flags, pcmk_rsc_unique)) {
4845 parent = uber_parent(rsc);
4846 }
4847
4848 switch (history.execution_status) {
4849 case PCMK_EXEC_PENDING:
4850 process_pending_action(&history, *last_failure);
4851 goto done;
4852
4853 case PCMK_EXEC_DONE:
4854 update_resource_state(&history, history.exit_status, *last_failure,
4855 on_fail);
4856 goto done;
4857
4859 unpack_failure_handling(&history, &failure_strategy, &fail_role);
4860 if (failure_strategy == pcmk_on_fail_ignore) {
4861 crm_warn("Cannot ignore failed %s of %s on %s: "
4862 "Resource agent doesn't exist "
4863 CRM_XS " status=%d rc=%d id=%s",
4864 history.task, rsc->id, pcmk__node_name(node),
4865 history.execution_status, history.exit_status,
4866 history.id);
4867 /* Also for printing it as "FAILED" by marking it as
4868 * pcmk_rsc_failed later
4869 */
4870 *on_fail = pcmk_on_fail_ban;
4871 }
4873 "hard-error", rsc->cluster);
4874 unpack_rsc_op_failure(&history, failure_strategy, fail_role,
4875 last_failure, on_fail);
4876 goto done;
4877
4879 if (pcmk__is_pacemaker_remote_node(node)
4882 /* We should never get into a situation where a managed remote
4883 * connection resource is considered OK but a resource action
4884 * behind the connection gets a "not connected" status. But as a
4885 * fail-safe in case a bug or unusual circumstances do lead to
4886 * that, ensure the remote connection is considered failed.
4887 */
4890 }
4891 break; // Not done, do error handling
4892
4893 case PCMK_EXEC_ERROR:
4896 case PCMK_EXEC_TIMEOUT:
4898 case PCMK_EXEC_INVALID:
4899 break; // Not done, do error handling
4900
4901 default: // No other value should be possible at this point
4902 break;
4903 }
4904
4905 unpack_failure_handling(&history, &failure_strategy, &fail_role);
4906 if ((failure_strategy == pcmk_on_fail_ignore)
4907 || ((failure_strategy == pcmk_on_fail_restart_container)
4908 && (strcmp(history.task, PCMK_ACTION_STOP) == 0))) {
4909
4910 char *last_change_s = last_change_str(xml_op);
4911
4912 crm_warn("Pretending failed %s (%s%s%s) of %s on %s at %s succeeded "
4913 CRM_XS " %s",
4914 history.task, services_ocf_exitcode_str(history.exit_status),
4915 (pcmk__str_empty(history.exit_reason)? "" : ": "),
4916 pcmk__s(history.exit_reason, ""), rsc->id,
4917 pcmk__node_name(node), last_change_s, history.id);
4918 free(last_change_s);
4919
4920 update_resource_state(&history, history.expected_exit_status,
4921 *last_failure, on_fail);
4922 crm_xml_add(xml_op, PCMK_XA_UNAME, node->details->uname);
4924
4925 record_failed_op(&history);
4926
4927 if ((failure_strategy == pcmk_on_fail_restart_container)
4928 && cmp_on_fail(*on_fail, pcmk_on_fail_restart) <= 0) {
4929 *on_fail = failure_strategy;
4930 }
4931
4932 } else {
4933 unpack_rsc_op_failure(&history, failure_strategy, fail_role,
4934 last_failure, on_fail);
4935
4936 if (history.execution_status == PCMK_EXEC_ERROR_HARD) {
4937 uint8_t log_level = LOG_ERR;
4938
4939 if (history.exit_status == PCMK_OCF_NOT_INSTALLED) {
4940 log_level = LOG_NOTICE;
4941 }
4942 do_crm_log(log_level,
4943 "Preventing %s from restarting on %s because "
4944 "of hard failure (%s%s%s) " CRM_XS " %s",
4945 parent->id, pcmk__node_name(node),
4946 services_ocf_exitcode_str(history.exit_status),
4947 (pcmk__str_empty(history.exit_reason)? "" : ": "),
4948 pcmk__s(history.exit_reason, ""), history.id);
4950 "hard-error", rsc->cluster);
4951
4952 } else if (history.execution_status == PCMK_EXEC_ERROR_FATAL) {
4953 pcmk__sched_err("Preventing %s from restarting anywhere because "
4954 "of fatal failure (%s%s%s) " CRM_XS " %s",
4955 parent->id,
4956 services_ocf_exitcode_str(history.exit_status),
4957 (pcmk__str_empty(history.exit_reason)? "" : ": "),
4958 pcmk__s(history.exit_reason, ""), history.id);
4960 "fatal-error", rsc->cluster);
4961 }
4962 }
4963
4964done:
4965 pcmk__rsc_trace(rsc, "%s role on %s after %s is %s (next %s)",
4966 rsc->id, pcmk__node_name(node), history.id,
4967 pcmk_role_text(rsc->role),
4969}
4970
4971static void
4972add_node_attrs(const xmlNode *xml_obj, pcmk_node_t *node, bool overwrite,
4974{
4975 const char *cluster_name = NULL;
4976
4977 pe_rule_eval_data_t rule_data = {
4978 .node_hash = NULL,
4979 .now = scheduler->now,
4980 .match_data = NULL,
4981 .rsc_data = NULL,
4982 .op_data = NULL
4983 };
4984
4986 CRM_ATTR_UNAME, node->details->uname);
4987
4989 if (pcmk__str_eq(node->details->id, scheduler->dc_uuid, pcmk__str_casei)) {
4990 scheduler->dc_node = node;
4991 node->details->is_dc = TRUE;
4994 } else {
4997 }
4998
4999 cluster_name = g_hash_table_lookup(scheduler->config_hash,
5001 if (cluster_name) {
5003 cluster_name);
5004 }
5005
5007 node->details->attrs, NULL, overwrite,
5008 scheduler);
5009
5011 node->details->utilization, NULL,
5012 FALSE, scheduler);
5013
5014 if (pcmk__node_attr(node, CRM_ATTR_SITE_NAME, NULL,
5015 pcmk__rsc_node_current) == NULL) {
5016 const char *site_name = pcmk__node_attr(node, "site-name", NULL,
5018
5019 if (site_name) {
5021 CRM_ATTR_SITE_NAME, site_name);
5022
5023 } else if (cluster_name) {
5024 /* Default to cluster-name if unset */
5026 CRM_ATTR_SITE_NAME, cluster_name);
5027 }
5028 }
5029}
5030
5031static GList *
5032extract_operations(const char *node, const char *rsc, xmlNode * rsc_entry, gboolean active_filter)
5033{
5034 int counter = -1;
5035 int stop_index = -1;
5036 int start_index = -1;
5037
5038 xmlNode *rsc_op = NULL;
5039
5040 GList *gIter = NULL;
5041 GList *op_list = NULL;
5042 GList *sorted_op_list = NULL;
5043
5044 /* extract operations */
5045 op_list = NULL;
5046 sorted_op_list = NULL;
5047
5048 for (rsc_op = pcmk__xe_first_child(rsc_entry, NULL, NULL, NULL);
5049 rsc_op != NULL; rsc_op = pcmk__xe_next(rsc_op)) {
5050
5051 if (pcmk__xe_is(rsc_op, PCMK__XE_LRM_RSC_OP)) {
5052 crm_xml_add(rsc_op, PCMK_XA_RESOURCE, rsc);
5053 crm_xml_add(rsc_op, PCMK_XA_UNAME, node);
5054 op_list = g_list_prepend(op_list, rsc_op);
5055 }
5056 }
5057
5058 if (op_list == NULL) {
5059 /* if there are no operations, there is nothing to do */
5060 return NULL;
5061 }
5062
5063 sorted_op_list = g_list_sort(op_list, sort_op_by_callid);
5064
5065 /* create active recurring operations as optional */
5066 if (active_filter == FALSE) {
5067 return sorted_op_list;
5068 }
5069
5070 op_list = NULL;
5071
5072 calculate_active_ops(sorted_op_list, &start_index, &stop_index);
5073
5074 for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) {
5075 xmlNode *rsc_op = (xmlNode *) gIter->data;
5076
5077 counter++;
5078
5079 if (start_index < stop_index) {
5080 crm_trace("Skipping %s: not active", pcmk__xe_id(rsc_entry));
5081 break;
5082
5083 } else if (counter < start_index) {
5084 crm_trace("Skipping %s: old", pcmk__xe_id(rsc_op));
5085 continue;
5086 }
5087 op_list = g_list_append(op_list, rsc_op);
5088 }
5089
5090 g_list_free(sorted_op_list);
5091 return op_list;
5092}
5093
5094GList *
5095find_operations(const char *rsc, const char *node, gboolean active_filter,
5097{
5098 GList *output = NULL;
5099 GList *intermediate = NULL;
5100
5101 xmlNode *tmp = NULL;
5103 NULL, NULL);
5104
5105 pcmk_node_t *this_node = NULL;
5106
5107 xmlNode *node_state = NULL;
5108
5109 CRM_CHECK(status != NULL, return NULL);
5110
5111 for (node_state = pcmk__xe_first_child(status, NULL, NULL, NULL);
5112 node_state != NULL; node_state = pcmk__xe_next(node_state)) {
5113
5114 if (pcmk__xe_is(node_state, PCMK__XE_NODE_STATE)) {
5115 const char *uname = crm_element_value(node_state, PCMK_XA_UNAME);
5116
5117 if (node != NULL && !pcmk__str_eq(uname, node, pcmk__str_casei)) {
5118 continue;
5119 }
5120
5121 this_node = pcmk_find_node(scheduler, uname);
5122 if(this_node == NULL) {
5123 CRM_LOG_ASSERT(this_node != NULL);
5124 continue;
5125
5126 } else if (pcmk__is_pacemaker_remote_node(this_node)) {
5127 determine_remote_online_status(scheduler, this_node);
5128
5129 } else {
5130 determine_online_status(node_state, this_node, scheduler);
5131 }
5132
5133 if (this_node->details->online
5135 /* offline nodes run no resources...
5136 * unless stonith is enabled in which case we need to
5137 * make sure rsc start events happen after the stonith
5138 */
5139 xmlNode *lrm_rsc = NULL;
5140
5141 tmp = pcmk__xe_first_child(node_state, PCMK__XE_LRM, NULL,
5142 NULL);
5144 NULL);
5145
5146 for (lrm_rsc = pcmk__xe_first_child(tmp, NULL, NULL, NULL);
5147 lrm_rsc != NULL; lrm_rsc = pcmk__xe_next(lrm_rsc)) {
5148
5149 if (pcmk__xe_is(lrm_rsc, PCMK__XE_LRM_RESOURCE)) {
5150 const char *rsc_id = crm_element_value(lrm_rsc,
5151 PCMK_XA_ID);
5152
5153 if (rsc != NULL && !pcmk__str_eq(rsc_id, rsc, pcmk__str_casei)) {
5154 continue;
5155 }
5156
5157 intermediate = extract_operations(uname, rsc_id, lrm_rsc, active_filter);
5158 output = g_list_concat(output, intermediate);
5159 }
5160 }
5161 }
5162 }
5163 }
5164
5165 return output;
5166}
@ pcmk__ar_first_implies_then
bool pcmk_xe_mask_probe_failure(const xmlNode *xml_op)
Check whether an action history entry represents a maskable probe.
Definition probes.c:69
#define PCMK_ACTION_STOP
Definition actions.h:75
const char * pcmk_on_fail_text(enum action_fail_response on_fail)
Get string equivalent of a failure handling type.
Definition actions.c:147
bool pcmk_is_probe(const char *task, guint interval)
Check whether an action name and interval represent a probe.
Definition probes.c:30
gboolean decode_transition_key(const char *key, char **uuid, int *transition_id, int *action_id, int *target_rc)
Parse a transition key into its constituent parts.
Definition actions.c:426
#define PCMK_ACTION_META_DATA
Definition actions.h:56
#define PCMK_ACTION_PROMOTE
Definition actions.h:66
#define PCMK_ACTION_START
Definition actions.h:72
action_fail_response
Definition actions.h:130
@ pcmk_on_fail_ban
Definition actions.h:150
@ pcmk_on_fail_fence_node
Definition actions.h:162
@ pcmk_on_fail_ignore
Definition actions.h:144
@ pcmk_on_fail_restart_container
Definition actions.h:167
@ pcmk_on_fail_demote
Definition actions.h:178
@ pcmk_on_fail_standby_node
Definition actions.h:159
@ pcmk_on_fail_block
Definition actions.h:153
@ pcmk_on_fail_reset_remote
Definition actions.h:175
@ pcmk_on_fail_stop
Definition actions.h:156
@ pcmk_on_fail_restart
Definition actions.h:147
#define PCMK_ACTION_MIGRATE_FROM
Definition actions.h:58
@ pcmk_action_optional
Definition actions.h:210
#define PCMK_ACTION_MIGRATE_TO
Definition actions.h:59
#define PCMK_ACTION_MONITOR
Definition actions.h:60
#define PCMK_ACTION_OFF
Definition actions.h:63
#define PCMK_ACTION_DEMOTE
Definition actions.h:49
#define PCMK_ACTION_NOTIFY
Definition actions.h:62
bool pcmk_xe_is_probe(const xmlNode *xml_op)
Check whether an action history entry represents a probe.
Definition probes.c:45
#define pcmk__set_action_flags(action, flags_to_set)
char * pcmk__op_key(const char *rsc_id, const char *op_type, guint interval_ms)
Generate an operation key (RESOURCE_ACTION_INTERVAL)
Definition actions.c:196
#define PCMK__ACTION_POWEROFF
const char * pcmk__node_attr(const pcmk_node_t *node, const char *name, const char *target, enum pcmk__rsc_node node_type)
Definition attrs.c:118
const char * parent
Definition cib.c:27
const char * name
Definition cib.c:26
pcmk_resource_t * pe__create_clone_child(pcmk_resource_t *rsc, pcmk_scheduler_t *scheduler)
Definition clone.c:247
int pcmk__effective_rc(int rc)
Definition agents.c:72
#define pcmk__assert_alloc(nmemb, size)
Definition internal.h:297
Utility functions.
char int pcmk_parse_interval_spec(const char *input, guint *result_ms)
Parse milliseconds from a Pacemaker interval specification.
Definition strings.c:451
char * crm_strdup_printf(char const *format,...) G_GNUC_PRINTF(1
gboolean crm_is_true(const char *s)
Definition strings.c:488
int crm_str_to_boolean(const char *s, int *ret)
Definition strings.c:496
#define pcmk_is_set(g, f)
Convenience alias for pcmk_all_flags_set(), to check single flag.
Definition util.h:100
int pe__unpack_resource(xmlNode *xml_obj, pcmk_resource_t **rsc, pcmk_resource_t *parent, pcmk_scheduler_t *scheduler)
Definition complex.c:639
pcmk_resource_t * uber_parent(pcmk_resource_t *rsc)
Definition complex.c:1007
pcmk__cpg_host_t host
Definition cpg.c:4
enum crm_ais_msg_types type
Definition cpg.c:3
char uname[MAX_NAME]
Definition cpg.c:5
char data[0]
Definition cpg.c:10
uint32_t id
Definition cpg.c:0
A dumping ground.
#define CRMD_JOINSTATE_NACK
Definition crm.h:146
#define CRM_ATTR_IS_DC
Definition crm.h:103
#define CRM_ATTR_SITE_NAME
Definition crm.h:105
#define CRMD_JOINSTATE_DOWN
Definition crm.h:143
#define CRMD_JOINSTATE_PENDING
Definition crm.h:144
#define CRM_ATTR_KIND
Definition crm.h:101
#define CRM_ATTR_CLUSTER_NAME
Definition crm.h:104
#define CRM_ATTR_UNAME
Definition crm.h:99
#define CRM_ATTR_ID
Definition crm.h:100
#define CRMD_JOINSTATE_MEMBER
Definition crm.h:145
@ pcmk__digest_match
@ pcmk__digest_unknown
@ pcmk__fc_effective
@ pcmk__fc_default
@ pcmk__rsc_node_current
Where resource is running.
char * pcmk__epoch2str(const time_t *source, uint32_t flags)
Definition iso8601.c:2014
const char * pcmk__readable_interval(guint interval_ms)
Definition iso8601.c:2073
#define CRM_TRACE_INIT_DATA(name)
Definition logging.h:143
#define crm_log_xml_info(xml, text)
Definition logging.h:408
#define crm_info(fmt, args...)
Definition logging.h:397
#define do_crm_log(level, fmt, args...)
Log a message.
Definition logging.h:181
#define crm_warn(fmt, args...)
Definition logging.h:392
#define CRM_XS
Definition logging.h:56
#define crm_log_xml_debug(xml, text)
Definition logging.h:409
#define CRM_LOG_ASSERT(expr)
Definition logging.h:228
#define crm_notice(fmt, args...)
Definition logging.h:395
#define CRM_CHECK(expr, failure_action)
Definition logging.h:245
#define crm_debug(fmt, args...)
Definition logging.h:400
#define crm_trace(fmt, args...)
Definition logging.h:402
#define pcmk__config_warn(fmt...)
#define pcmk__config_err(fmt...)
@ pcmk__wo_ping_node
@ pcmk__wo_poweroff
@ pcmk__wo_blind
@ pcmk__wo_rdisc_enabled
@ pcmk__wo_remove_after
#define pcmk__warn_once(wo_flag, fmt...)
pcmk_scheduler_t * scheduler
#define PCMK_NODE_ATTR_STANDBY
Definition nodes.h:31
@ node_ping
Definition nodes.h:42
@ pcmk_node_variant_remote
Definition nodes.h:40
@ pcmk_node_variant_cluster
Definition nodes.h:39
#define PCMK_NODE_ATTR_TERMINATE
Definition nodes.h:32
#define PCMK_NODE_ATTR_MAINTENANCE
Definition nodes.h:30
#define PCMK__NODE_ATTR_RESOURCE_DISCOVERY_ENABLED
const char * crm_element_value(const xmlNode *data, const char *name)
Retrieve the value of an XML attribute.
Definition nvpair.c:446
int crm_element_value_int(const xmlNode *data, const char *name, int *dest)
Retrieve the integer value of an XML attribute.
Definition nvpair.c:482
int crm_element_value_ms(const xmlNode *data, const char *name, guint *dest)
Retrieve the millisecond value of an XML attribute.
Definition nvpair.c:539
int crm_element_value_epoch(const xmlNode *xml, const char *name, time_t *dest)
Retrieve the seconds-since-epoch value of an XML attribute.
Definition nvpair.c:567
const char * crm_xml_add(xmlNode *node, const char *name, const char *value)
Create an XML attribute with specified name and value.
Definition nvpair.c:301
#define PCMK_OPT_STOP_ALL_RESOURCES
Definition options.h:69
#define PCMK_META_INTERVAL
Definition options.h:91
#define PCMK_VALUE_FENCE_LEGACY
Definition options.h:224
#define PCMK_OPT_SYMMETRIC_CLUSTER
Definition options.h:72
#define PCMK_META_ON_FAIL
Definition options.h:98
#define PCMK_OPT_NODE_PENDING_TIMEOUT
Definition options.h:53
#define PCMK_OPT_STARTUP_FENCING
Definition options.h:63
#define PCMK_OPT_STOP_ORPHAN_RESOURCES
Definition options.h:71
#define PCMK_OPT_SHUTDOWN_LOCK_LIMIT
Definition options.h:61
#define PCMK_OPT_MAINTENANCE_MODE
Definition options.h:44
#define PCMK_OPT_STOP_ORPHAN_ACTIONS
Definition options.h:70
#define PCMK_OPT_NO_QUORUM_POLICY
Definition options.h:46
#define PCMK_OPT_PRIORITY_FENCING_DELAY
Definition options.h:58
#define PCMK_VALUE_OFFLINE
Definition options.h:183
#define PCMK_VALUE_CIB_BOOTSTRAP_OPTIONS
Definition options.h:137
#define PCMK_VALUE_TRUE
Definition options.h:215
#define PCMK_META_REMOTE_CONNECT_TIMEOUT
Definition options.h:107
#define PCMK_VALUE_DEMOTE
Definition options.h:145
#define PCMK_META_REMOTE_NODE
Definition options.h:108
#define PCMK_OPT_HAVE_WATCHDOG
Definition options.h:40
#define PCMK_VALUE_FREEZE
Definition options.h:155
#define PCMK_OPT_CONCURRENT_FENCING
Definition options.h:33
#define PCMK_OPT_START_FAILURE_IS_FATAL
Definition options.h:62
#define PCMK_OPT_PLACEMENT_STRATEGY
Definition options.h:57
#define PCMK_META_IS_MANAGED
Definition options.h:92
#define PCMK_OPT_STONITH_ENABLED
Definition options.h:65
#define PCMK_OPT_ENABLE_STARTUP_PROBES
Definition options.h:38
#define PCMK_VALUE_REMOTE
Definition options.h:198
#define PCMK_META_TARGET_ROLE
Definition options.h:113
#define PCMK_OPT_CLUSTER_NAME
Definition options.h:31
#define PCMK_VALUE_IGNORE
Definition options.h:161
#define PCMK_VALUE_MEMBER
Definition options.h:169
#define PCMK_META_REMOTE_PORT
Definition options.h:109
#define PCMK_VALUE_FALSE
Definition options.h:152
#define PCMK_OPT_STONITH_TIMEOUT
Definition options.h:67
#define PCMK_META_REMOTE_ALLOW_MIGRATE
Definition options.h:106
#define PCMK_OPT_STONITH_WATCHDOG_TIMEOUT
Definition options.h:68
#define PCMK_META_REMOTE_ADDR
Definition options.h:105
#define PCMK_OPT_STONITH_ACTION
Definition options.h:64
#define PCMK_OPT_SHUTDOWN_LOCK
Definition options.h:60
#define PCMK_VALUE_STOP
Definition options.h:209
#define PCMK_VALUE_ONLINE
Definition options.h:184
const char * pcmk__cluster_option(GHashTable *options, const char *name)
Definition options.c:1412
#define PCMK__META_CONTAINER
#define PCMK__META_MIGRATE_SOURCE
#define PCMK__OPT_REMOVE_AFTER_STOP
void pcmk__validate_cluster_options(GHashTable *options)
Definition options.c:1558
#define PCMK__META_MIGRATE_TARGET
#define PCMK__VALUE_PING
unsigned int timeout
Definition pcmk_fence.c:32
const char * action
Definition pcmk_fence.c:30
pcmk__action_result_t result
Definition pcmk_fence.c:35
const char * target
Definition pcmk_fence.c:29
void pe__unpack_node_health_scores(pcmk_scheduler_t *scheduler)
Definition pe_health.c:24
G_GNUC_INTERNAL gint pe__cmp_rsc_priority(gconstpointer a, gconstpointer b)
Definition utils.c:295
bool pe_can_fence(const pcmk_scheduler_t *scheduler, const pcmk_node_t *node)
Definition utils.c:36
pcmk_action_t * pe__clear_failcount(pcmk_resource_t *rsc, const pcmk_node_t *node, const char *reason, pcmk_scheduler_t *scheduler)
Schedule a controller operation to clear a fail count.
Definition failcounts.c:458
pcmk_node_t * pe__copy_node(const pcmk_node_t *this_node)
Definition utils.c:89
GHashTable * pcmk__unpack_action_meta(pcmk_resource_t *rsc, const pcmk_node_t *node, const char *action_name, guint interval_ms, const xmlNode *action_config)
Definition pe_actions.c:702
GHashTable * pe__node_list2table(const GList *list)
Definition utils.c:115
gboolean get_target_role(const pcmk_resource_t *rsc, enum rsc_role_e *role)
Definition utils.c:410
time_t get_effective_time(pcmk_scheduler_t *scheduler)
Definition utils.c:395
void pe__unpack_dataset_nvpairs(const xmlNode *xml_obj, const char *set_name, const pe_rule_eval_data_t *rule_data, GHashTable *hash, const char *always_first, gboolean overwrite, pcmk_scheduler_t *scheduler)
Definition utils.c:719
GList * pe__resource_actions(const pcmk_resource_t *rsc, const pcmk_node_t *node, const char *task, bool require_node)
Find all actions of given type for a resource.
#define demote_action(rsc, node, optional)
Definition internal.h:230
void pe__add_param_check(const xmlNode *rsc_op, pcmk_resource_t *rsc, pcmk_node_t *node, enum pcmk__check_parameters, pcmk_scheduler_t *scheduler)
Definition remote.c:189
void destroy_ticket(gpointer data)
Definition utils.c:505
int pe__is_newer_op(const xmlNode *xml_a, const xmlNode *xml_b, bool same_node_default)
const pcmk_resource_t * pe__const_top_resource(const pcmk_resource_t *rsc, bool include_bundle)
Definition complex.c:1032
pcmk__op_digest_t * rsc_action_digest_cmp(pcmk_resource_t *rsc, const xmlNode *xml_op, pcmk_node_t *node, pcmk_scheduler_t *scheduler)
Definition pe_digest.c:394
gboolean order_actions(pcmk_action_t *lh_action, pcmk_action_t *rh_action, uint32_t flags)
Definition utils.c:457
bool pe__is_universal_clone(const pcmk_resource_t *rsc, const pcmk_scheduler_t *scheduler)
Definition clone.c:1283
void resource_location(pcmk_resource_t *rsc, const pcmk_node_t *node, int score, const char *tag, pcmk_scheduler_t *scheduler)
Definition utils.c:359
pcmk_resource_t * pe__find_bundle_replica(const pcmk_resource_t *bundle, const pcmk_node_t *node)
Definition bundle.c:1402
int pe_get_failcount(const pcmk_node_t *node, pcmk_resource_t *rsc, time_t *last_failure, uint32_t flags, const xmlNode *xml_op)
Definition failcounts.c:361
void pe__update_recheck_time(time_t recheck, pcmk_scheduler_t *scheduler, const char *reason)
Definition utils.c:694
void pe__free_digests(gpointer ptr)
Definition pe_digest.c:33
gint pe__cmp_node_name(gconstpointer a, gconstpointer b)
Definition utils.c:145
pcmk_action_t * pe_fence_op(pcmk_node_t *node, const char *op, bool optional, const char *reason, bool priority_delay, pcmk_scheduler_t *scheduler)
bool pe__bundle_needs_remote_name(pcmk_resource_t *rsc)
Definition bundle.c:920
enum rsc_role_e pcmk__role_after_failure(const pcmk_resource_t *rsc, const char *action_name, enum action_fail_response on_fail, GHashTable *meta)
gint sort_op_by_callid(gconstpointer a, gconstpointer b)
void pe__set_next_role(pcmk_resource_t *rsc, enum rsc_role_e role, const char *why)
Definition complex.c:1253
pcmk_action_t * custom_action(pcmk_resource_t *rsc, char *key, const char *task, const pcmk_node_t *on_node, gboolean optional, pcmk_scheduler_t *scheduler)
Create or update an action object.
#define stop_action(rsc, node, optional)
Definition internal.h:214
void native_add_running(pcmk_resource_t *rsc, pcmk_node_t *node, pcmk_scheduler_t *scheduler, gboolean failed)
Definition native.c:91
pcmk_ticket_t * ticket_new(const char *ticket_id, pcmk_scheduler_t *scheduler)
Definition utils.c:517
void pe__clear_resource_history(pcmk_resource_t *rsc, const pcmk_node_t *node)
bool pe__shutdown_requested(const pcmk_node_t *node)
Definition utils.c:677
xmlNode * pcmk__find_action_config(const pcmk_resource_t *rsc, const char *action_name, guint interval_ms, bool include_disabled)
Definition pe_actions.c:132
gboolean add_tag_ref(GHashTable *tags, const char *tag_name, const char *obj_ref)
Definition utils.c:627
enum action_fail_response pcmk__parse_on_fail(const pcmk_resource_t *rsc, const char *action_name, guint interval_ms, const char *value)
Definition pe_actions.c:890
bool xml_contains_remote_node(xmlNode *xml)
Definition remote.c:47
xmlNode * pe_create_remote_xml(xmlNode *parent, const char *uname, const char *container_id, const char *migrateable, const char *is_managed, const char *start_timeout, const char *server, const char *port)
Definition remote.c:125
@ pcmk_rsc_match_clone_only
Match only clones and their instances, by either clone or instance ID.
Definition resources.h:191
@ pcmk_rsc_variant_primitive
Definition resources.h:37
@ pcmk_rsc_promotable
Definition resources.h:106
@ pcmk_rsc_stop_if_failed
Definition resources.h:121
@ pcmk_rsc_migratable
Definition resources.h:157
@ pcmk_rsc_unique
Definition resources.h:100
@ pcmk_rsc_needs_fencing
Definition resources.h:175
@ pcmk_rsc_removed
Definition resources.h:85
@ pcmk_rsc_start_pending
Definition resources.h:142
@ pcmk_rsc_blocked
Definition resources.h:91
@ pcmk_rsc_removed_filler
Definition resources.h:94
@ pcmk_rsc_has_filler
Definition resources.h:169
@ pcmk_rsc_managed
Definition resources.h:88
@ pcmk_rsc_ignore_failure
Definition resources.h:160
@ pcmk_rsc_failed
Definition resources.h:133
#define CRM_ASSERT(expr)
Definition results.h:42
@ CRM_EX_ERROR
Unspecified error.
Definition results.h:256
@ CRM_EX_MAX
Ensure crm_exit_t can hold this.
Definition results.h:320
@ PCMK_OCF_INSUFFICIENT_PRIV
Insufficient privileges.
Definition results.h:185
@ PCMK_OCF_FAILED_PROMOTED
Service failed and possibly in promoted role.
Definition results.h:193
@ PCMK_OCF_RUNNING_PROMOTED
Service active and promoted.
Definition results.h:192
@ PCMK_OCF_DEGRADED_PROMOTED
Service promoted but more likely to fail soon.
Definition results.h:195
@ PCMK_OCF_UNIMPLEMENT_FEATURE
Requested action not implemented.
Definition results.h:184
@ PCMK_OCF_NOT_CONFIGURED
Parameter invalid (inherently)
Definition results.h:187
@ PCMK_OCF_DEGRADED
Service active but more likely to fail soon.
Definition results.h:194
@ PCMK_OCF_NOT_INSTALLED
Dependencies not available locally.
Definition results.h:186
@ PCMK_OCF_UNKNOWN_ERROR
Unspecified error.
Definition results.h:181
@ PCMK_OCF_INVALID_PARAM
Parameter invalid (in local context)
Definition results.h:183
@ PCMK_OCF_NOT_RUNNING
Service safely stopped.
Definition results.h:190
@ PCMK_OCF_OK
Success.
Definition results.h:178
@ PCMK_OCF_UNKNOWN
Action is pending.
Definition results.h:203
@ pcmk_rc_ok
Definition results.h:162
@ pcmk_rc_undetermined
Definition results.h:135
@ pcmk_rc_unpack_error
Definition results.h:125
#define pcmk_ok
Definition results.h:69
const char * crm_exit_str(crm_exit_t exit_code)
Definition results.c:640
@ PCMK_EXEC_CANCELLED
Action was cancelled.
Definition results.h:334
@ PCMK_EXEC_NO_SECRETS
Necessary CIB secrets are unavailable.
Definition results.h:344
@ PCMK_EXEC_ERROR_FATAL
Execution failed, do not retry anywhere.
Definition results.h:339
@ PCMK_EXEC_NOT_INSTALLED
Agent or dependency not available locally.
Definition results.h:340
@ PCMK_EXEC_INVALID
Action cannot be attempted (e.g. shutdown)
Definition results.h:342
@ PCMK_EXEC_DONE
Action completed, result is known.
Definition results.h:333
@ PCMK_EXEC_ERROR
Execution failed, may be retried.
Definition results.h:337
@ PCMK_EXEC_NOT_SUPPORTED
Agent does not implement requested action.
Definition results.h:336
@ PCMK_EXEC_TIMEOUT
Action did not complete in time.
Definition results.h:335
@ PCMK_EXEC_PENDING
Action is in progress.
Definition results.h:332
@ PCMK_EXEC_UNKNOWN
Used only to initialize variables.
Definition results.h:331
@ PCMK_EXEC_ERROR_HARD
Execution failed, do not retry on node.
Definition results.h:338
@ PCMK_EXEC_MAX
Maximum value for this enum.
Definition results.h:347
@ PCMK_EXEC_NO_FENCE_DEVICE
No fence device is configured for target.
Definition results.h:343
@ PCMK_EXEC_NOT_CONNECTED
No connection to executor.
Definition results.h:341
const char * pcmk_role_text(enum rsc_role_e role)
Get readable description of a resource role.
Definition roles.c:23
rsc_role_e
Definition roles.h:34
@ pcmk_role_started
Started.
Definition roles.h:37
@ pcmk_role_unknown
Resource role is unknown.
Definition roles.h:35
@ pcmk_role_unpromoted
Unpromoted.
Definition roles.h:38
@ pcmk_role_promoted
Promoted.
Definition roles.h:39
@ pcmk_role_stopped
Stopped.
Definition roles.h:36
#define pcmk__set_rsc_flags(resource, flags_to_set)
#define pcmk__clear_rsc_flags(resource, flags_to_clear)
@ pcmk_no_quorum_freeze
Definition scheduler.h:41
@ pcmk_no_quorum_stop
Definition scheduler.h:42
@ pcmk_no_quorum_ignore
Definition scheduler.h:43
@ pcmk_no_quorum_demote
Definition scheduler.h:45
@ pcmk_no_quorum_fence
Definition scheduler.h:44
pcmk_node_t * pcmk_find_node(const pcmk_scheduler_t *scheduler, const char *node_name)
Find a node by name in scheduler data.
Definition scheduler.c:103
@ pcmk_sched_stop_removed_resources
Definition scheduler.h:108
@ pcmk_sched_in_maintenance
Definition scheduler.h:86
@ pcmk_sched_symmetric_cluster
Definition scheduler.h:83
@ pcmk_sched_fencing_enabled
Definition scheduler.h:89
@ pcmk_sched_probe_resources
Definition scheduler.h:142
@ pcmk_sched_have_remote_nodes
Definition scheduler.h:148
@ pcmk_sched_have_fencing
Definition scheduler.h:96
@ pcmk_sched_shutdown_lock
Definition scheduler.h:136
@ pcmk_sched_location_only
Definition scheduler.h:158
@ pcmk_sched_quorate
Definition scheduler.h:80
@ pcmk_sched_concurrent_fencing
Definition scheduler.h:102
@ pcmk_sched_start_failure_fatal
Definition scheduler.h:124
@ pcmk_sched_enable_unfencing
Definition scheduler.h:99
@ pcmk_sched_remove_after_stop
Definition scheduler.h:127
@ pcmk_sched_cancel_removed_actions
Definition scheduler.h:114
@ pcmk_sched_stop_all
Definition scheduler.h:117
@ pcmk_sched_startup_fencing
Definition scheduler.h:130
#define pcmk__rsc_info(rsc, fmt, args...)
#define pcmk__rsc_trace(rsc, fmt, args...)
@ pcmk__check_last_failure
#define pcmk__clear_scheduler_flags(scheduler, flags_to_clear)
#define pcmk__sched_warn(fmt...)
#define pcmk__rsc_debug(rsc, fmt, args...)
#define pcmk__sched_err(fmt...)
#define pcmk__set_scheduler_flags(scheduler, flags_to_set)
int char2score(const char *score)
Get the integer value of a score string.
Definition scores.c:36
#define PCMK_SCORE_INFINITY
Integer score to use to represent "infinity".
Definition scores.h:24
Services API.
pcmk_resource_t * pe_find_resource(GList *rsc_list, const char *id_rh)
Definition status.c:426
pcmk_node_t * pe_find_node_any(const GList *node_list, const char *id, const char *node_name)
Find a node by name or ID in a list of nodes.
Definition status.c:461
void pcmk__insert_dup(GHashTable *table, const char *name, const char *value)
Definition strings.c:701
int pcmk__scan_min_int(const char *text, int *result, int minimum)
Definition strings.c:127
GHashTable * pcmk__strkey_table(GDestroyNotify key_destroy_func, GDestroyNotify value_destroy_func)
Definition strings.c:683
int pcmk__scan_ll(const char *text, long long *result, long long default_value)
Definition strings.c:97
void pcmk__str_update(char **str, const char *value)
Definition strings.c:1277
bool pcmk__strcase_any_of(const char *s,...) G_GNUC_NULL_TERMINATED
Definition strings.c:1026
@ pcmk__str_none
@ pcmk__str_null_matches
@ pcmk__str_casei
bool pcmk__ends_with(const char *s, const char *match)
Definition strings.c:608
bool pcmk__str_any_of(const char *s,...) G_GNUC_NULL_TERMINATED
Definition strings.c:1050
void pcmk__g_strcat(GString *buffer,...) G_GNUC_NULL_TERMINATED
Definition strings.c:1296
#define pcmk__str_copy(str)
enum pcmk__digest_result rc
int weight
Definition nodes.h:162
gboolean fixed
Definition nodes.h:163
struct pe_node_shared_s * details
Definition nodes.h:167
GHashTable * attrs
Definition nodes.h:142
gboolean shutdown
Definition nodes.h:97
GHashTable * digest_cache
Definition nodes.h:144
gboolean expected_up
Definition nodes.h:99
const char * id
Definition nodes.h:72
gboolean online
Definition nodes.h:80
gboolean standby_onfail
Definition nodes.h:83
const char * uname
Definition nodes.h:73
gboolean standby
Definition nodes.h:82
GHashTable * utilization
Definition nodes.h:143
gboolean unpacked
Definition nodes.h:126
pcmk_scheduler_t * data_set
Definition nodes.h:153
gboolean remote_maintenance
Definition nodes.h:124
gboolean is_dc
Definition nodes.h:100
gboolean unclean
Definition nodes.h:91
gboolean remote_requires_reset
Definition nodes.h:112
pcmk_resource_t * remote_rsc
Definition nodes.h:135
gboolean maintenance
Definition nodes.h:104
gboolean rsc_discovery_enabled
Definition nodes.h:106
enum node_type type
Definition nodes.h:74
gboolean pending
Definition nodes.h:87
gboolean remote_was_fenced
Definition nodes.h:118
GList * running_rsc
Definition nodes.h:139
gboolean unseen
Definition nodes.h:93
GList * running_on
Definition resources.h:456
enum pe_obj_types variant
Definition resources.h:410
pcmk_node_t * partial_migration_target
Definition resources.h:450
GHashTable * meta
Definition resources.h:467
GList * children
Definition resources.h:471
pcmk_scheduler_t * cluster
Definition resources.h:408
pcmk_node_t * partial_migration_source
Definition resources.h:453
pcmk_resource_t * container
Definition resources.h:476
pcmk_rsc_methods_t * fns
Definition resources.h:412
GHashTable * known_on
Definition resources.h:459
char * clone_name
Definition resources.h:397
gboolean is_remote_node
Definition resources.h:431
GHashTable * allowed_nodes
Definition resources.h:462
GList * dangling_migrations
Definition resources.h:474
pcmk_node_t * lock_node
Definition resources.h:481
unsigned long long flags
Definition resources.h:428
pcmk_node_t * pending_node
Definition resources.h:480
guint remote_reconnect_ms
Definition resources.h:423
GList * fillers
Definition resources.h:477
enum rsc_role_e next_role
Definition resources.h:465
enum rsc_role_e role
Definition resources.h:464
pcmk_resource_t * parent
Definition resources.h:409
time_t lock_time
Definition resources.h:483
GHashTable * node_hash
Definition common.h:46
char * id
Definition tags.h:30
GList * refs
Definition tags.h:31
GHashTable * state
Definition tickets.h:35
char * id
Definition tickets.h:31
gboolean standby
Definition tickets.h:34
gboolean granted
Definition tickets.h:32
time_t last_granted
Definition tickets.h:33
guint node_pending_timeout
Definition scheduler.h:266
const char * stonith_action
Definition scheduler.h:205
GHashTable * tags
Definition scheduler.h:253
const char * placement_strategy
Definition scheduler.h:206
GHashTable * config_hash
Definition scheduler.h:219
GHashTable * template_rsc_sets
Definition scheduler.h:248
xmlNode * input
Definition scheduler.h:196
GList * resources
Definition scheduler.h:231
unsigned long long flags
Definition scheduler.h:211
pcmk_node_t * dc_node
Definition scheduler.h:203
enum pe_quorum_policy no_quorum_policy
Definition scheduler.h:217
GList * stop_needed
Definition scheduler.h:257
GHashTable * tickets
Definition scheduler.h:222
int priority_fencing_delay
Definition scheduler.h:261
crm_time_t * now
Definition scheduler.h:198
const char * localhost
Definition scheduler.h:251
pcmk_resource_t *(* find_rsc)(pcmk_resource_t *rsc, const char *search, const pcmk_node_t *node, int flags)
Definition resources.h:276
pcmk_node_t *(* location)(const pcmk_resource_t *rsc, GList **list, int current)
Definition resources.h:328
pcmk_node_t * pe_create_node(const char *id, const char *uname, const char *type, const char *score, pcmk_scheduler_t *scheduler)
Definition unpack.c:455
#define set_config_flag(scheduler, option, flag)
Definition unpack.c:51
#define XPATH_ENABLE_UNFENCING
Definition unpack.c:193
void calculate_active_ops(const GList *sorted_op_list, int *start_index, int *stop_index)
Definition unpack.c:2588
void pe_fence_node(pcmk_scheduler_t *scheduler, pcmk_node_t *node, const char *reason, bool priority_delay)
Schedule a fence action for a node.
Definition unpack.c:112
const char * pe_base_name_end(const char *id)
Definition unpack.c:1879
gboolean unpack_status(xmlNode *status, pcmk_scheduler_t *scheduler)
Definition unpack.c:1371
#define SUB_XPATH_LRM_RESOURCE
Definition unpack.c:2893
GList * find_operations(const char *rsc, const char *node, gboolean active_filter, pcmk_scheduler_t *scheduler)
Definition unpack.c:5095
gboolean unpack_remote_nodes(xmlNode *xml_resources, pcmk_scheduler_t *scheduler)
Definition unpack.c:703
char * clone_zero(const char *last_rsc_id)
Definition unpack.c:1941
#define XPATH_NODE_STATE
Definition unpack.c:2891
#define SUB_XPATH_LRM_RSC_OP
Definition unpack.c:2896
gboolean unpack_resources(const xmlNode *xml_resources, pcmk_scheduler_t *scheduler)
Definition unpack.c:847
gboolean unpack_config(xmlNode *config, pcmk_scheduler_t *scheduler)
Definition unpack.c:214
char * clone_strip(const char *last_rsc_id)
Definition unpack.c:1919
gboolean unpack_nodes(xmlNode *xml_nodes, pcmk_scheduler_t *scheduler)
Definition unpack.c:623
int pe__target_rc_from_xml(const xmlNode *xml_op)
Definition unpack.c:4349
gboolean unpack_tags(xmlNode *xml_tags, pcmk_scheduler_t *scheduler)
Definition unpack.c:913
Wrappers for and extensions to libxml2.
xmlNode * get_xpath_object(const char *xpath, xmlNode *xml_obj, int error_level)
Definition xpath.c:189
void freeXpathObject(xmlXPathObjectPtr xpathObj)
Definition xpath.c:39
xmlXPathObjectPtr xpath_search(const xmlNode *xml_top, const char *path)
Definition xpath.c:139
xmlNode * pcmk__xml_copy(xmlNode *parent, xmlNode *src)
Definition xml.c:883
xmlNode * pcmk__xe_first_child(const xmlNode *parent, const char *node_name, const char *attr_n, const char *attr_v)
Definition xml.c:440
xmlNode * pcmk__xe_next_same(const xmlNode *node)
Definition xml.c:2108
@ pcmk__xaf_none
Flag has no effect.
int pcmk__xe_copy_attrs(xmlNode *target, const xmlNode *src, uint32_t flags)
Definition xml.c:584
xmlNode * pcmk__xe_create(xmlNode *parent, const char *name)
Definition xml.c:720
#define PCMK_XE_UTILIZATION
Definition xml_names.h:212
#define PCMK_XE_STATUS
Definition xml_names.h:199
#define PCMK_XA_SCORE
Definition xml_names.h:391
#define PCMK_XE_NODE
Definition xml_names.h:133
#define PCMK_XE_GROUP
Definition xml_names.h:116
#define PCMK_XA_OPERATION
Definition xml_names.h:344
#define PCMK_XA_ID
Definition xml_names.h:296
#define PCMK_XA_CRMD
Definition xml_names.h:251
#define PCMK_XE_TICKETS
Definition xml_names.h:208
#define PCMK_XE_INSTANCE_ATTRIBUTES
Definition xml_names.h:119
#define PCMK_XA_LAST_RC_CHANGE
Definition xml_names.h:311
#define PCMK_XE_META_ATTRIBUTES
Definition xml_names.h:127
#define PCMK_XA_VALUE
Definition xml_names.h:437
#define PCMK_XA_LAST_GRANTED
Definition xml_names.h:310
#define PCMK_XA_EXIT_REASON
Definition xml_names.h:269
#define PCMK_XE_PRIMITIVE
Definition xml_names.h:160
#define PCMK_XE_CLUSTER_PROPERTY_SET
Definition xml_names.h:84
#define PCMK_XA_TYPE
Definition xml_names.h:425
#define PCMK_XA_RESOURCE_DISCOVERY
Definition xml_names.h:379
#define PCMK_XE_TAG
Definition xml_names.h:203
#define PCMK_XA_STANDBY
Definition xml_names.h:401
#define PCMK_XA_EXPECTED
Definition xml_names.h:273
#define PCMK_XE_OBJ_REF
Definition xml_names.h:142
#define PCMK_XE_TEMPLATE
Definition xml_names.h:206
#define PCMK_XA_NO_QUORUM_PANIC
Definition xml_names.h:328
#define PCMK_XA_UNAME
Definition xml_names.h:426
#define PCMK_XA_NAME
Definition xml_names.h:325
#define PCMK_XA_RESOURCE
Definition xml_names.h:377
#define PCMK__XE_LRM_RSC_OP
#define PCMK__XE_LRM_RESOURCE
#define PCMK__XA_NODE_FENCED
#define PCMK__XE_TRANSIENT_ATTRIBUTES
#define PCMK__XE_LRM_RESOURCES
#define PCMK__XA_CALL_ID
#define PCMK__XA_RSC_ID
#define PCMK__XA_OP_RESTART_DIGEST
#define PCMK__XA_JOIN
#define PCMK__XA_IN_CCM
#define PCMK__XA_OP_STATUS
#define PCMK__XA_TRANSITION_KEY
#define PCMK__XA_GRANTED
#define PCMK__XE_NODE_STATE
#define PCMK__XE_LRM
#define PCMK__XA_NODE_IN_MAINTENANCE
#define PCMK__XA_RC_CODE
#define PCMK__XE_TICKET_STATE