30 #define set_config_flag(data_set, option, flag) do { \
31 const char *tmp = pe_pref(data_set->config_hash, option); \
33 if(crm_is_true(tmp)) { \
34 set_bit(data_set->flags, flag); \
36 clear_bit(data_set->flags, flag); \
42 xmlNode **last_failure,
46 static void add_node_attrs(xmlNode *attrs,
pe_node_t *node,
bool overwrite,
54 is_dangling_guest_node(
node_t *node)
89 "(otherwise would because %s): "
90 "its guest resource %s is unmanaged",
93 crm_warn(
"Guest node %s will be fenced "
94 "(by recovering its guest resource %s): %s",
106 }
else if (is_dangling_guest_node(node)) {
107 crm_info(
"Cleaning up dangling connection for guest node %s: "
108 "fencing was already done because %s, "
109 "and guest resource no longer exists",
118 "(otherwise would because %s): connection is unmanaged",
124 pe_can_fence(data_set, node)?
"will be fenced" :
"is unclean",
131 crm_trace(
"Cluster node %s %s because %s",
133 pe_can_fence(data_set, node)?
"would also be fenced" :
"also is unclean",
139 pe_can_fence(data_set, node)?
"will be fenced" :
"is unclean",
149 #define XPATH_UNFENCING_NVPAIR XML_CIB_TAG_NVPAIR \
150 "[(@" XML_NVPAIR_ATTR_NAME "='" XML_RSC_ATTR_PROVIDES "'" \
151 "or @" XML_NVPAIR_ATTR_NAME "='" XML_RSC_ATTR_REQUIRES "') " \
152 "and @" XML_NVPAIR_ATTR_VALUE "='unfencing']"
155 #define XPATH_ENABLE_UNFENCING \
156 "/" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION "/" XML_CIB_TAG_RESOURCES \
157 "//" XML_TAG_META_SETS "/" XPATH_UNFENCING_NVPAIR \
158 "|/" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION "/" XML_CIB_TAG_RSCCONFIG \
159 "/" XML_TAG_META_SETS "/" XPATH_UNFENCING_NVPAIR
162 void set_if_xpath(
unsigned long long flag,
const char *xpath,
165 xmlXPathObjectPtr result = NULL;
167 if (is_not_set(data_set->
flags, flag)) {
169 if (result && (numXpathResults(result) > 0)) {
179 const char *value = NULL;
180 GHashTable *config_hash = crm_str_table_new();
191 crm_info(
"Startup probes: disabled (dangerous)");
196 crm_notice(
"Watchdog will be used via SBD if fencing is required "
197 "and stonith-watchdog-timeout is nonzero");
211 crm_debug(
"STONITH of failed nodes is %s",
217 "Support for stonith-action of 'poweroff' is deprecated "
218 "and will be removed in a future release (use 'off' instead)");
228 crm_debug(
"Stop all active resources: %s",
233 crm_debug(
"Cluster is symmetric" " - resources can run anywhere by default");
253 crm_notice(
"Resetting no-quorum-policy to 'stop': cluster has never had quorum");
257 crm_config_err(
"Resetting no-quorum-policy to 'stop': stonith is not configured");
267 crm_debug(
"On loss of quorum: Freeze resources");
270 crm_debug(
"On loss of quorum: Stop ALL resources");
273 crm_notice(
"On loss of quorum: Fence all remaining nodes");
285 crm_trace(
"Orphan resource actions are %s",
289 crm_trace(
"Stopped resources are removed from the status section: %s",
298 is_set(data_set->
flags,
305 crm_trace(
"Unseen nodes will be fenced");
314 crm_debug(
"Node scores: 'red' = %s, 'yellow' = %s, 'green' = %s",
326 destroy_digest_cache(gpointer ptr)
334 free(
data->digest_all_calc);
335 free(
data->digest_restart_calc);
336 free(
data->digest_secure_calc);
349 " - this is rarely intended",
uname);
352 new_node = calloc(1,
sizeof(
node_t));
353 if (new_node == NULL) {
358 new_node->
fixed = FALSE;
361 if (new_node->
details == NULL) {
396 destroy_digest_cache);
409 if (
data->name_check == NULL) {
411 for (xml_rsc = __xml_first_child_element(parent); xml_rsc != NULL;
412 xml_rsc = __xml_next_element(xml_rsc)) {
414 const char *
id =
ID(xml_rsc);
417 g_hash_table_insert(
data->name_check, (
char *)
id, (
char *)
id);
420 if (g_hash_table_lookup(
data->name_check, remote_name)) {
425 crm_err(
"Invalid remote-node name, a resource called '%s' already exists.", remote_name);
436 xmlNode *attr_set = NULL;
437 xmlNode *attr = NULL;
439 const char *container_id =
ID(xml_obj);
440 const char *remote_name = NULL;
441 const char *remote_server = NULL;
442 const char *remote_port = NULL;
443 const char *connect_timeout =
"60s";
444 const char *remote_allow_migrate=NULL;
445 const char *is_managed = NULL;
447 for (attr_set = __xml_first_child_element(xml_obj); attr_set != NULL;
448 attr_set = __xml_next_element(attr_set)) {
453 for (attr = __xml_first_child_element(attr_set); attr != NULL;
454 attr = __xml_next_element(attr)) {
461 remote_server = value;
464 }
else if (
safe_str_eq(name,
"remote-connect-timeout")) {
465 connect_timeout = value;
466 }
else if (
safe_str_eq(name,
"remote-allow-migrate")) {
467 remote_allow_migrate=value;
474 if (remote_name == NULL) {
483 remote_allow_migrate, is_managed,
484 connect_timeout, remote_server, remote_port);
516 xmlNode *xml_obj = NULL;
518 const char *
id = NULL;
519 const char *
uname = NULL;
520 const char *
type = NULL;
521 const char *score = NULL;
523 for (xml_obj = __xml_first_child_element(xml_nodes); xml_obj != NULL;
524 xml_obj = __xml_next_element(xml_obj)) {
541 if (new_node == NULL) {
551 handle_startup_fencing(data_set, new_node);
553 add_node_attrs(xml_obj, new_node, FALSE, data_set);
563 crm_info(
"Creating a fake local node");
574 const char *container_id = NULL;
579 for (; gIter != NULL; gIter = gIter->next) {
582 setup_container(child_rsc, data_set);
595 pe_rsc_trace(rsc,
"Resource %s's container is %s", rsc->
id, container_id);
597 pe_err(
"Resource %s: Unknown resource container (%s)", rsc->
id, container_id);
605 xmlNode *xml_obj = NULL;
610 for (xml_obj = __xml_first_child_element(xml_resources); xml_obj != NULL;
611 xml_obj = __xml_next_element(xml_obj)) {
613 const char *new_node_id = NULL;
619 new_node_id =
ID(xml_obj);
623 crm_trace(
"Found remote node %s defined by resource %s",
624 new_node_id,
ID(xml_obj));
639 new_node_id = expand_remote_rsc_meta(xml_obj, xml_resources, data_set);
641 crm_trace(
"Found guest node %s in resource %s",
642 new_node_id,
ID(xml_obj));
653 xmlNode *xml_obj2 = NULL;
654 for (xml_obj2 = __xml_first_child_element(xml_obj); xml_obj2 != NULL;
655 xml_obj2 = __xml_next_element(xml_obj2)) {
657 new_node_id = expand_remote_rsc_meta(xml_obj2, xml_resources, data_set);
660 crm_trace(
"Found guest node %s in resource %s inside group %s",
661 new_node_id,
ID(xml_obj2),
ID(xml_obj));
682 node_t *remote_node = NULL;
696 pe_rsc_trace(new_rsc,
"Linking remote connection resource %s to node %s",
704 handle_startup_fencing(data_set, remote_node);
711 strdup(
"container"));
716 destroy_tag(gpointer
data)
722 g_list_free_full(tag->
refs, free);
742 xmlNode *xml_obj = NULL;
749 for (xml_obj = __xml_first_child_element(xml_resources); xml_obj != NULL;
750 xml_obj = __xml_next_element(xml_obj)) {
755 const char *template_id =
ID(xml_obj);
758 template_id, NULL, NULL) == FALSE) {
765 crm_trace(
"Beginning unpack... <%s id=%s... >", crm_element_name(xml_obj),
ID(xml_obj));
773 if (new_rsc != NULL && new_rsc->
fns != NULL) {
779 for (gIter = data_set->
resources; gIter != NULL; gIter = gIter->next) {
782 setup_container(rsc, data_set);
783 link_rsc2remotenode(data_set, rsc);
793 crm_config_err(
"Resource start-up disabled since no STONITH resources have been defined");
794 crm_config_err(
"Either configure some or disable STONITH with the stonith-enabled option");
795 crm_config_err(
"NOTE: Clusters with shared data need STONITH to ensure data integrity");
804 xmlNode *xml_tag = NULL;
809 for (xml_tag = __xml_first_child_element(xml_tags); xml_tag != NULL;
810 xml_tag = __xml_next_element(xml_tag)) {
812 xmlNode *xml_obj_ref = NULL;
813 const char *tag_id =
ID(xml_tag);
819 if (tag_id == NULL) {
825 for (xml_obj_ref = __xml_first_child_element(xml_tag); xml_obj_ref != NULL;
826 xml_obj_ref = __xml_next_element(xml_obj_ref)) {
828 const char *obj_ref =
ID(xml_obj_ref);
834 if (obj_ref == NULL) {
835 crm_config_err(
"Failed unpacking %s for tag %s: %s should be specified",
836 crm_element_name(xml_obj_ref), tag_id,
XML_ATTR_ID);
854 const char *ticket_id = NULL;
855 const char *granted = NULL;
856 const char *last_granted = NULL;
857 const char *standby = NULL;
858 xmlAttrPtr xIter = NULL;
862 ticket_id =
ID(xml_ticket);
863 if (ticket_id == NULL || strlen(ticket_id) == 0) {
867 crm_trace(
"Processing ticket state for %s", ticket_id);
869 ticket = g_hash_table_lookup(data_set->
tickets, ticket_id);
870 if (ticket == NULL) {
872 if (ticket == NULL) {
877 for (xIter = xml_ticket->properties; xIter; xIter = xIter->next) {
878 const char *prop_name = (
const char *)xIter->name;
884 g_hash_table_replace(ticket->
state, strdup(prop_name), strdup(prop_value));
887 granted = g_hash_table_lookup(ticket->
state,
"granted");
893 crm_info(
"We do not have ticket '%s'", ticket->
id);
896 last_granted = g_hash_table_lookup(ticket->
state,
"last-granted");
901 standby = g_hash_table_lookup(ticket->
state,
"standby");
905 crm_info(
"Granted ticket '%s' is in standby-mode", ticket->
id);
911 crm_trace(
"Done with ticket state for %s", ticket_id);
919 xmlNode *xml_obj = NULL;
921 for (xml_obj = __xml_first_child_element(xml_tickets); xml_obj != NULL;
922 xml_obj = __xml_next_element(xml_obj)) {
927 unpack_ticket_state(xml_obj, data_set);
936 const char *resource_discovery_enabled = NULL;
937 xmlNode *attrs = NULL;
958 add_node_attrs(attrs, this_node, TRUE, data_set);
980 if (resource_discovery_enabled && !
crm_is_true(resource_discovery_enabled)) {
983 crm_warn(
"Ignoring %s attribute on remote node %s because stonith is disabled",
1000 bool changed =
false;
1001 xmlNode *lrm_rsc = NULL;
1003 for (xmlNode *state = __xml_first_child_element(status); state != NULL;
1004 state = __xml_next_element(state)) {
1006 const char *
id = NULL;
1007 const char *
uname = NULL;
1008 node_t *this_node = NULL;
1009 bool process = FALSE;
1019 if (this_node == NULL) {
1020 crm_info(
"Node %s is unknown",
id);
1024 crm_info(
"Node %s is already processed",
id);
1039 }
else if(rsc == NULL) {
1058 determine_remote_online_status(data_set, this_node);
1059 unpack_handle_remote_attrs(this_node, state, data_set);
1071 crm_trace(
"Processing lrm resource entries on %shealthy%s node: %s",
1092 const char *
id = NULL;
1093 const char *
uname = NULL;
1095 xmlNode *state = NULL;
1096 node_t *this_node = NULL;
1100 if (data_set->
tickets == NULL) {
1105 for (state = __xml_first_child_element(status); state != NULL;
1106 state = __xml_next_element(state)) {
1109 unpack_tickets_state((xmlNode *) state, data_set);
1112 xmlNode *attrs = NULL;
1113 const char *resource_discovery_enabled = NULL;
1119 if (
uname == NULL) {
1123 }
else if (this_node == NULL) {
1145 add_node_attrs(attrs, this_node, TRUE, data_set);
1158 if (resource_discovery_enabled && !
crm_is_true(resource_discovery_enabled)) {
1159 crm_warn(
"ignoring %s attribute on node %s, disabling resource discovery is not allowed on cluster nodes",
1172 pe_fence_node(data_set, this_node,
"cluster does not have quorum");
1178 while(unpack_node_loop(status, FALSE, data_set)) {
1189 for (GList *item = data_set->
stop_needed; item; item = item->next) {
1191 pe_node_t *node = pe__current_node(container);
1201 for (
GListPtr gIter = data_set->
nodes; gIter != NULL; gIter = gIter->next) {
1202 node_t *this_node = gIter->data;
1204 if (this_node == NULL) {
1211 determine_remote_online_status(data_set, this_node);
1218 determine_online_status_no_fencing(
pe_working_set_t * data_set, xmlNode * node_state,
1221 gboolean online = FALSE;
1234 crm_debug(
"Node is not ready to run resources: %s", join);
1239 crm_trace(
"\tis_peer=%s, join=%s, expected=%s",
1244 pe_fence_node(data_set, this_node,
"peer is unexpectedly down");
1245 crm_info(
"\tin_cluster=%s, is_peer=%s, join=%s, expected=%s",
1252 determine_online_status_fencing(
pe_working_set_t * data_set, xmlNode * node_state,
1255 gboolean online = FALSE;
1256 gboolean do_terminate = FALSE;
1257 bool crmd_online = FALSE;
1272 do_terminate = TRUE;
1274 }
else if (terminate != NULL && strlen(terminate) > 0) {
1276 char t = terminate[0];
1278 if (t !=
'0' && isdigit(t)) {
1279 do_terminate = TRUE;
1283 crm_trace(
"%s: in_cluster=%s, is_peer=%s, join=%s, expected=%s, term=%d",
1289 if (exp_state == NULL) {
1297 online = crmd_online;
1299 }
else if (in_cluster == NULL) {
1300 pe_fence_node(data_set, this_node,
"peer has not been seen by the cluster");
1303 pe_fence_node(data_set, this_node,
"peer failed the pacemaker membership criteria");
1317 &&
crm_is_true(in_cluster) == FALSE && !crmd_online) {
1322 pe_fence_node(data_set, this_node,
"peer is no longer part of the cluster");
1324 }
else if (!crmd_online) {
1325 pe_fence_node(data_set, this_node,
"peer process is no longer available");
1328 }
else if (do_terminate) {
1329 pe_fence_node(data_set, this_node,
"termination was requested");
1341 pe_fence_node(data_set, this_node,
"peer was in an unknown state");
1342 crm_warn(
"%s: in-cluster=%s, is-peer=%s, join=%s, expected=%s, term=%d, shutdown=%d",
1363 goto remote_online_done;
1368 if (container && (g_list_length(rsc->
running_on) == 1)) {
1374 crm_trace(
"%s node %s presumed ONLINE because connection resource is started",
1375 (container?
"Guest" :
"Remote"), this_node->
details->
id);
1381 crm_trace(
"%s node %s shutting down because connection resource is stopping",
1382 (container?
"Guest" :
"Remote"), this_node->
details->
id);
1388 crm_trace(
"Guest node %s UNCLEAN because guest resource failed",
1394 crm_trace(
"%s node %s OFFLINE because connection resource failed",
1395 (container?
"Guest" :
"Remote"), this_node->
details->
id);
1401 crm_trace(
"%s node %s OFFLINE because its resource is stopped",
1402 (container?
"Guest" :
"Remote"), this_node->
details->
id);
1406 }
else if (
host && (
host->details->online == FALSE)
1407 &&
host->details->unclean) {
1408 crm_trace(
"Guest node %s UNCLEAN because host is unclean",
1423 gboolean online = FALSE;
1426 if (this_node == NULL) {
1449 online = determine_online_status_no_fencing(data_set, node_state, this_node);
1452 online = determine_online_status_fencing(data_set, node_state, this_node);
1460 this_node->
fixed = TRUE;
1466 this_node->
fixed = TRUE;
1501 if (!crm_strlen_zero(
id)) {
1502 const char *end =
id + strlen(
id) - 1;
1504 for (
const char *s = end; s >
id; --s) {
1518 return (s == end)? s : (s - 1);
1542 char *basename = NULL;
1545 basename =
strndup(last_rsc_id, end - last_rsc_id + 1);
1564 size_t base_name_len = end - last_rsc_id + 1;
1568 zero = calloc(base_name_len + 3,
sizeof(
char));
1570 memcpy(zero, last_rsc_id, base_name_len);
1571 zero[base_name_len] =
':';
1572 zero[base_name_len + 1] =
'0';
1577 create_fake_resource(
const char *rsc_id, xmlNode * rsc_entry,
pe_working_set_t * data_set)
1593 crm_debug(
"Detected orphaned remote node %s", rsc_id);
1598 link_rsc2remotenode(data_set, rsc);
1601 crm_trace(
"Setting node %s as shutting down due to orphaned connection resource", rsc_id);
1608 crm_trace(
"Detected orphaned container filler %s", rsc_id);
1621 create_anonymous_orphan(
pe_resource_t *parent,
const char *rsc_id,
1629 pe_rsc_debug(parent,
"Created orphan %s for %s: %s on %s",
1655 gboolean skip_inactive = FALSE;
1663 for (rIter = parent->
children; rsc == NULL && rIter; rIter = rIter->next) {
1708 crm_notice(
"Active (now-)anonymous clone %s has "
1709 "multiple (orphan) instance histories on %s",
1711 skip_inactive = TRUE;
1718 g_list_free(locations);
1722 if (!skip_inactive && !inactive_instance
1725 inactive_instance = parent->
fns->
find_rsc(child, rsc_id, NULL,
1731 if (inactive_instance && inactive_instance->
pending_node
1733 inactive_instance = NULL;
1739 if ((rsc == NULL) && !skip_inactive && (inactive_instance != NULL)) {
1740 pe_rsc_trace(parent,
"Resource %s, empty slot", inactive_instance->
id);
1741 rsc = inactive_instance;
1765 rsc = create_anonymous_orphan(parent, rsc_id, node, data_set);
1773 xmlNode * rsc_entry)
1792 crm_trace(
"%s found as %s (%s)", rsc_id, clone0_id, parent->
id);
1794 crm_trace(
"%s is not known as %s either (orphan)",
1800 crm_trace(
"Resource history for %s is orphaned because it is no longer primitive",
1808 if (pe_rsc_is_anon_clone(parent)) {
1810 if (pe_rsc_is_bundled(parent)) {
1815 rsc = find_anonymous_clone(data_set, node, parent, base);
1826 pe_rsc_debug(rsc,
"Internally renamed %s on %s to %s%s",
1840 rsc = create_fake_resource(rsc_id, rsc_entry, data_set);
1859 char *reason = NULL;
1862 pe_rsc_trace(rsc,
"Resource %s is %s on %s: on_fail=%s",
1890 gboolean should_fence = FALSE;
1901 should_fence = TRUE;
1915 " revoked if remote connection can "
1916 "be re-established elsewhere)",
1919 should_fence = TRUE;
1923 if (reason == NULL) {
1986 if (rsc->
container && pe_rsc_is_bundled(rsc)) {
2016 "remote connection is unrecoverable");
2051 " %s must be stopped manually on %s",
2074 for (; gIter != NULL; gIter = gIter->next) {
2080 g_list_free(possible_matches);
2087 int start_index,
int stop_index,
2091 const char *task = NULL;
2092 const char *status = NULL;
2096 pe_rsc_trace(rsc,
"%s: Start index %d, stop index = %d", rsc->
id, start_index, stop_index);
2098 for (; gIter != NULL; gIter = gIter->next) {
2099 xmlNode *rsc_op = (xmlNode *) gIter->data;
2101 guint interval_ms = 0;
2103 const char *
id =
ID(rsc_op);
2104 const char *interval_ms_s = NULL;
2113 }
else if (start_index < stop_index && counter <= stop_index) {
2117 }
else if (counter < start_index) {
2124 if (interval_ms == 0) {
2146 int implied_monitor_start = -1;
2147 int implied_clone_start = -1;
2148 const char *task = NULL;
2149 const char *status = NULL;
2155 for (; gIter != NULL; gIter = gIter->next) {
2156 xmlNode *rsc_op = (xmlNode *) gIter->data;
2165 *stop_index = counter;
2168 *start_index = counter;
2174 implied_monitor_start = counter;
2177 implied_clone_start = counter;
2181 if (*start_index == -1) {
2182 if (implied_clone_start != -1) {
2183 *start_index = implied_clone_start;
2184 }
else if (implied_monitor_start != -1) {
2185 *start_index = implied_monitor_start;
2194 int stop_index = -1;
2195 int start_index = -1;
2198 const char *task = NULL;
2205 xmlNode *migrate_op = NULL;
2206 xmlNode *rsc_op = NULL;
2207 xmlNode *last_failure = NULL;
2213 crm_element_name(rsc_entry), rsc_id, node->
details->
uname);
2217 sorted_op_list = NULL;
2219 for (rsc_op = __xml_first_child_element(rsc_entry); rsc_op != NULL;
2220 rsc_op = __xml_next_element(rsc_op)) {
2222 op_list = g_list_prepend(op_list, rsc_op);
2226 if (op_list == NULL) {
2232 rsc = unpack_find_resource(data_set, node, rsc_id, rsc_entry);
2234 rsc = process_orphan_resource(rsc_entry, node, data_set);
2239 saved_role = rsc->
role;
2244 for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) {
2245 xmlNode *rsc_op = (xmlNode *) gIter->data;
2249 migrate_op = rsc_op;
2252 unpack_rsc_op(rsc, node, rsc_op, &last_failure, &on_fail, data_set);
2257 process_recurring(node, rsc, start_index, stop_index, sorted_op_list, data_set);
2260 g_list_free(sorted_op_list);
2262 process_rsc_state(rsc, node, on_fail, migrate_op, data_set);
2266 pe_rsc_debug(rsc,
"%s: Overwriting calculated next role %s"
2267 " with requested next role %s",
2272 pe_rsc_info(rsc,
"%s: Not overwriting calculated next role %s"
2273 " with requested next role %s",
2278 if (saved_role > rsc->
role) {
2279 rsc->
role = saved_role;
2286 handle_orphaned_container_fillers(xmlNode * lrm_rsc_list,
pe_working_set_t * data_set)
2288 xmlNode *rsc_entry = NULL;
2289 for (rsc_entry = __xml_first_child_element(lrm_rsc_list); rsc_entry != NULL;
2290 rsc_entry = __xml_next_element(rsc_entry)) {
2295 const char *container_id;
2303 if (container_id == NULL || rsc_id == NULL) {
2308 if (container == NULL) {
2319 pe_rsc_trace(rsc,
"Mapped container of orphaned resource %s to %s",
2320 rsc->
id, container_id);
2329 xmlNode *rsc_entry = NULL;
2330 gboolean found_orphaned_container_filler = FALSE;
2336 for (rsc_entry = __xml_first_child_element(lrm_rsc_list); rsc_entry != NULL;
2337 rsc_entry = __xml_next_element(rsc_entry)) {
2340 resource_t *rsc = unpack_lrm_rsc_state(node, rsc_entry, data_set);
2345 found_orphaned_container_filler = TRUE;
2353 if (found_orphaned_container_filler) {
2354 handle_orphaned_container_fillers(lrm_rsc_list, data_set);
2372 set_node_score(gpointer key, gpointer value, gpointer user_data)
2375 int *score = user_data;
2380 #define STATUS_PATH_MAX 1024
2382 find_lrm_op(
const char *resource,
const char *op,
const char *node,
const char *source,
2387 xmlNode *xml = NULL;
2389 offset += snprintf(xpath + offset,
STATUS_PATH_MAX - offset,
"//node_state[@uname='%s']", node);
2414 if (xml && success_only) {
2428 pe__call_id(xmlNode *op_xml)
2461 return (stop_op && (pe__call_id(stop_op) > pe__call_id(xml_op)));
2486 int from_status = 0;
2489 xmlNode *migrate_from = NULL;
2496 if (stop_happened_after(rsc, node, xml_op, data_set)) {
2508 source, FALSE, data_set);
2512 pe_rsc_trace(rsc,
"%s op on %s exited with status=%d, rc=%d",
2513 ID(migrate_from), target, from_status, from_rc);
2522 pe_rsc_trace(rsc,
"Detected dangling migration op: %s on %s",
ID(xml_op),
2529 pe_rsc_trace(rsc,
"Marking active on %s %p %d", target, target_node,
2536 pe_rsc_trace(rsc,
"Marking active on %s %p %d", target, target_node,
2562 int target_stop_id = 0;
2563 int target_migrate_from_id = 0;
2564 xmlNode *target_stop = NULL;
2565 xmlNode *target_migrate_from = NULL;
2580 target_stop_id = pe__call_id(target_stop);
2584 source, TRUE, data_set);
2585 target_migrate_from_id = pe__call_id(target_migrate_from);
2587 if ((target_stop == NULL) || (target_stop_id < target_migrate_from_id)) {
2595 target_stop_id, target_migrate_from_id);
2600 }
else if (target_migrate_from == NULL) {
2611 xmlNode *source_migrate_from = NULL;
2612 xmlNode *source_start = NULL;
2613 int source_migrate_to_id = pe__call_id(xml_op);
2616 NULL, TRUE, data_set);
2617 if (pe__call_id(source_migrate_from) > source_migrate_to_id) {
2623 if (pe__call_id(source_start) > source_migrate_to_id) {
2636 xmlNode *source_stop = NULL;
2637 xmlNode *source_migrate_to = NULL;
2655 source, target, TRUE, data_set);
2657 if ((source_stop == NULL)
2658 || (pe__call_id(source_stop) < pe__call_id(source_migrate_to))) {
2672 record_failed_op(xmlNode *op,
const pe_node_t *node,
2675 xmlNode *xIter = NULL;
2682 for (xIter = data_set->
failed->children; xIter; xIter = xIter->next) {
2698 static const char *get_op_key(xmlNode *xml_op)
2708 last_change_str(xmlNode *xml_op)
2711 const char *when_s = NULL;
2718 when_s = strchr(when_s,
' ');
2724 return ((when_s && *when_s)? when_s :
"unknown time");
2728 unpack_rsc_op_failure(
resource_t * rsc,
node_t * node,
int rc, xmlNode * xml_op, xmlNode ** last_failure,
2731 guint interval_ms = 0;
2732 bool is_probe =
false;
2735 const char *key = get_op_key(xml_op);
2743 *last_failure = xml_op;
2750 if (exit_reason == NULL) {
2756 crm_trace(
"Unexpected result (%s%s%s) was recorded for "
2757 "%s of %s on %s at %s " CRM_XS " rc=%d id=%s",
2758 services_ocf_exitcode_str(rc),
2759 (*exit_reason?
": " :
""), exit_reason,
2761 last_change_str(xml_op), rc,
ID(xml_op));
2763 crm_warn(
"Unexpected result (%s%s%s) was recorded for "
2764 "%s of %s on %s at %s " CRM_XS " rc=%d id=%s",
2765 services_ocf_exitcode_str(rc),
2766 (*exit_reason?
": " :
""), exit_reason,
2768 last_change_str(xml_op), rc,
ID(xml_op));
2777 crm_notice(
"If it is not possible for %s to run on %s, see "
2778 "the resource-discovery option for location constraints",
2782 record_failed_op(xml_op, node, rsc, data_set);
2785 action =
custom_action(rsc, strdup(key), task, NULL, TRUE, FALSE, data_set);
2799 unpack_migrate_to_failure(rsc, node, xml_op, data_set);
2802 unpack_migrate_from_failure(rsc, node, xml_op, data_set);
2836 pe_rsc_trace(rsc,
"Resource %s: role=%s, unclean=%s, on_fail=%s, fail_role=%s",
2853 if (pe_rsc_is_clone(parent)
2861 crm_notice(
"%s will not be started under current conditions",
2868 g_hash_table_foreach(fail_rsc->
allowed_nodes, set_node_score, &score);
2894 determine_op_status(
2897 guint interval_ms = 0;
2898 bool is_probe =
false;
2900 const char *key = get_op_key(xml_op);
2908 if (exit_reason == NULL) {
2918 if (target_rc < 0) {
2928 crm_warn(
"Expected result not found for %s on %s (corrupt or obsolete CIB?)",
2931 }
else if (target_rc != rc) {
2933 pe_rsc_debug(rsc,
"%s on %s: expected %d (%s), got %d (%s%s%s)",
2935 target_rc, services_ocf_exitcode_str(target_rc),
2936 rc, services_ocf_exitcode_str(rc),
2937 (*exit_reason?
": " :
""), exit_reason);
2944 pe_rsc_info(rsc,
"Probe found %s active on %s at %s",
2946 last_change_str(xml_op));
2962 if (is_probe && (rc != target_rc)) {
2965 "Probe found %s active and promoted on %s at %s",
2967 last_change_str(xml_op));
2983 if (interval_ms > 0) {
2994 pe_proc_err(
"No further recovery can be attempted for %s "
2995 "because %s on %s failed (%s%s%s) at %s "
2996 CRM_XS " rc=%d id=%s", rsc->
id, task,
2998 (*exit_reason?
": " :
""), exit_reason,
2999 last_change_str(xml_op), rc,
ID(xml_op));
3008 crm_info(
"Treating unknown exit status %d from %s of %s "
3009 "on %s at %s as failure",
3011 last_change_str(xml_op));
3021 should_clear_for_param_change(xmlNode *xml_op,
const char *task,
3025 if (!strcmp(task,
"start") || !strcmp(task,
"monitor")) {
3039 switch (digest_data->
rc) {
3041 crm_trace(
"Resource %s history entry %s on %s"
3042 " has no digest to compare",
3071 should_ignore_failure_timeout(
pe_resource_t *rsc, xmlNode *xml_op,
3072 const char *task, guint interval_ms,
3102 if (is_last_failure) {
3103 crm_info(
"Waiting to clear monitor failure for remote node %s"
3104 " until fencing has occurred", rsc->
id);
3138 bool expired = FALSE;
3140 time_t last_run = 0;
3141 guint interval_ms = 0;
3142 int unexpired_fail_count = 0;
3144 const char *clear_reason = NULL;
3155 time_t last_failure = 0;
3159 && !should_ignore_failure_timeout(rsc, xml_op, task, interval_ms,
3160 is_last_failure, data_set)) {
3170 crm_trace(
"%s@%lld is %sexpired @%lld with unexpired_failures=%d timeout=%ds"
3171 " last-failure@%lld",
3172 ID(xml_op), (
long long) last_run, (expired?
"" :
"not "),
3174 (
long long) last_failure);
3176 if (unexpired_fail_count && (now < last_failure)) {
3186 if (unexpired_fail_count == 0) {
3188 clear_reason =
"it expired";
3204 clear_reason =
"reconnect interval is set";
3208 if (!expired && is_last_failure
3209 && should_clear_for_param_change(xml_op, task, rsc, node, data_set)) {
3210 clear_reason =
"resource parameters have changed";
3213 if (clear_reason != NULL) {
3228 crm_info(
"Clearing %s failure will wait until any scheduled "
3229 "fencing of %s completes", task, rsc->
id);
3230 order_after_remote_fencing(clear_op, rsc, data_set);
3275 update_resource_state(
resource_t * rsc,
node_t * node, xmlNode * xml_op,
const char * task,
int rc,
3278 gboolean clear_past_failure = FALSE;
3284 clear_past_failure = TRUE;
3291 const char *op_key = get_op_key(xml_op);
3292 const char *last_failure_key = get_op_key(last_failure);
3295 clear_past_failure = TRUE;
3305 clear_past_failure = TRUE;
3309 clear_past_failure = TRUE;
3313 clear_past_failure = TRUE;
3321 clear_past_failure = TRUE;
3324 unpack_migrate_to_success(rsc, node, xml_op, data_set);
3332 if (clear_past_failure) {
3338 pe_rsc_trace(rsc,
"%s.%s is not cleared by a completed stop",
3386 remap_monitor_rc(
int rc, xmlNode *xml_op,
const pe_node_t *node,
3389 int remapped_rc = rc;
3404 if (rc != remapped_rc) {
3405 crm_trace(
"Remapping monitor result %d to %d", rc, remapped_rc);
3407 record_failed_op(xml_op, node, rsc, data_set);
3422 guint interval_ms = 0;
3423 const char *task = NULL;
3424 const char *task_key = NULL;
3425 const char *exit_reason = NULL;
3426 bool expired = FALSE;
3430 CRM_CHECK(rsc && node && xml_op,
return);
3433 task_key = get_op_key(xml_op);
3436 if (exit_reason == NULL) {
3459 pe_rsc_trace(rsc,
"Unpacking task %s/%s (call_id=%d, status=%d, rc=%d) on %s (role=%s)",
3463 pe_rsc_trace(rsc,
"Node %s (where %s is running) is unclean."
3464 " Further action depends on the value of the stop's on-fail attribute",
3481 && check_operation_expiry(rsc, node, rc, xml_op, data_set)) {
3486 rc = remap_monitor_rc(rc, xml_op, node, rsc, data_set);
3489 if (expired && (rc != target_rc)) {
3492 if (interval_ms == 0) {
3493 crm_notice(
"Ignoring expired %s failure on %s "
3494 CRM_XS " actual=%d expected=%d magic=%s",
3509 crm_notice(
"Rescheduling %s after failure expired on %s "
3510 CRM_XS " actual=%d expected=%d magic=%s",
3523 status = determine_op_status(rsc, rc, target_rc, node, xml_op, on_fail, data_set);
3524 pe_rsc_trace(rsc,
"Remapped %s status to %d", task_key, status);
3530 pe_err(
"Resource history contains cancellation '%s' "
3531 "(%s of %s on %s at %s)",
3533 last_change_str(xml_op));
3575 last_change_str(xml_op),
ID(xml_op));
3576 update_resource_state(rsc, node, xml_op, task, rc, *last_failure, on_fail, data_set);
3580 failure_strategy = get_action_on_fail(rsc, task_key, task, data_set);
3582 crm_warn(
"Cannot ignore failed %s of %s on %s: "
3583 "Resource agent doesn't exist "
3584 CRM_XS " status=%d rc=%d id=%s",
3591 unpack_rsc_op_failure(rsc, node, rc, xml_op, last_failure, on_fail, data_set);
3615 failure_strategy = get_action_on_fail(rsc, task_key, task, data_set);
3620 crm_warn(
"Pretending failed %s (%s%s%s) of %s on %s at %s "
3621 "succeeded " CRM_XS " rc=%d id=%s",
3622 task, services_ocf_exitcode_str(rc),
3623 (*exit_reason?
": " :
""), exit_reason, rsc->
id,
3627 update_resource_state(rsc, node, xml_op, task, target_rc, *last_failure, on_fail, data_set);
3631 record_failed_op(xml_op, node, rsc, data_set);
3634 *on_fail = failure_strategy;
3638 unpack_rsc_op_failure(rsc, node, rc, xml_op, last_failure, on_fail, data_set);
3642 "Preventing %s from restarting on %s because "
3643 "of hard failure (%s%s%s)" CRM_XS " rc=%d id=%s",
3645 services_ocf_exitcode_str(rc),
3646 (*exit_reason?
": " :
""), exit_reason,
3651 crm_err(
"Preventing %s from restarting anywhere because "
3652 "of fatal failure (%s%s%s) " CRM_XS " rc=%d id=%s",
3653 parent->
id, services_ocf_exitcode_str(rc),
3654 (*exit_reason?
": " :
""), exit_reason,
3663 pe_rsc_trace(rsc,
"Resource %s after %s: role=%s, next=%s",
3669 add_node_attrs(xmlNode *xml_obj,
pe_node_t *node,
bool overwrite,
3672 const char *cluster_name = NULL;
3689 cluster_name = g_hash_table_lookup(data_set->
config_hash,
"cluster-name");
3692 strdup(cluster_name));
3706 }
else if (cluster_name) {
3710 strdup(cluster_name));
3716 extract_operations(
const char *node,
const char *rsc, xmlNode * rsc_entry, gboolean active_filter)
3719 int stop_index = -1;
3720 int start_index = -1;
3722 xmlNode *rsc_op = NULL;
3730 sorted_op_list = NULL;
3732 for (rsc_op = __xml_first_child_element(rsc_entry);
3733 rsc_op != NULL; rsc_op = __xml_next_element(rsc_op)) {
3737 op_list = g_list_prepend(op_list, rsc_op);
3741 if (op_list == NULL) {
3749 if (active_filter == FALSE) {
3750 return sorted_op_list;
3757 for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) {
3758 xmlNode *rsc_op = (xmlNode *) gIter->data;
3762 if (start_index < stop_index) {
3763 crm_trace(
"Skipping %s: not active",
ID(rsc_entry));
3766 }
else if (counter < start_index) {
3770 op_list = g_list_append(op_list, rsc_op);
3773 g_list_free(sorted_op_list);
3784 xmlNode *tmp = NULL;
3787 node_t *this_node = NULL;
3789 xmlNode *node_state = NULL;
3791 for (node_state = __xml_first_child_element(status); node_state != NULL;
3792 node_state = __xml_next_element(node_state)) {
3802 if(this_node == NULL) {
3807 determine_remote_online_status(data_set, this_node);
3818 xmlNode *lrm_rsc = NULL;
3823 for (lrm_rsc = __xml_first_child_element(tmp); lrm_rsc != NULL;
3824 lrm_rsc = __xml_next_element(lrm_rsc)) {
3833 intermediate = extract_operations(
uname, rsc_id, lrm_rsc, active_filter);
3834 output = g_list_concat(output, intermediate);