corosync  2.4.2-dirty
exec/votequorum.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2009-2015 Red Hat, Inc.
3  *
4  * All rights reserved.
5  *
6  * Authors: Christine Caulfield (ccaulfie@redhat.com)
7  * Fabio M. Di Nitto (fdinitto@redhat.com)
8  *
9  * This software licensed under BSD license, the text of which follows:
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions are met:
13  *
14  * - Redistributions of source code must retain the above copyright notice,
15  * this list of conditions and the following disclaimer.
16  * - Redistributions in binary form must reproduce the above copyright notice,
17  * this list of conditions and the following disclaimer in the documentation
18  * and/or other materials provided with the distribution.
19  * - Neither the name of the MontaVista Software, Inc. nor the names of its
20  * contributors may be used to endorse or promote products derived from this
21  * software without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTIBUTORS "AS IS"
24  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
27  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
29  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
30  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
31  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
32  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
33  * THE POSSIBILITY OF SUCH DAMAGE.
34  */
35 
36 #include <config.h>
37 
38 #include <sys/types.h>
39 #include <sys/stat.h>
40 #include <fcntl.h>
41 #include <stdint.h>
42 #include <unistd.h>
43 
44 #include <qb/qbipc_common.h>
45 
46 #include "quorum.h"
47 #include <corosync/corodefs.h>
48 #include <corosync/list.h>
49 #include <corosync/logsys.h>
50 #include <corosync/coroapi.h>
51 #include <corosync/icmap.h>
52 #include <corosync/votequorum.h>
54 
55 #include "service.h"
56 #include "util.h"
57 
58 LOGSYS_DECLARE_SUBSYS ("VOTEQ");
59 
60 /*
61  * interface with corosync
62  */
63 
64 static struct corosync_api_v1 *corosync_api;
65 
66 /*
67  * votequorum global config vars
68  */
69 
70 
71 static char qdevice_name[VOTEQUORUM_QDEVICE_MAX_NAME_LEN];
72 static struct cluster_node *qdevice = NULL;
73 static unsigned int qdevice_timeout = VOTEQUORUM_QDEVICE_DEFAULT_TIMEOUT;
74 static unsigned int qdevice_sync_timeout = VOTEQUORUM_QDEVICE_DEFAULT_SYNC_TIMEOUT;
75 static uint8_t qdevice_can_operate = 1;
76 static void *qdevice_reg_conn = NULL;
77 static uint8_t qdevice_master_wins = 0;
78 
79 static uint8_t two_node = 0;
80 
81 static uint8_t wait_for_all = 0;
82 static uint8_t wait_for_all_status = 0;
83 
84 static enum {ATB_NONE, ATB_LOWEST, ATB_HIGHEST, ATB_LIST} auto_tie_breaker = ATB_NONE;
85 static int lowest_node_id = -1;
86 static int highest_node_id = -1;
87 
88 #define DEFAULT_LMS_WIN 10000
89 static uint8_t last_man_standing = 0;
90 static uint32_t last_man_standing_window = DEFAULT_LMS_WIN;
91 
92 static uint8_t allow_downscale = 0;
93 static uint32_t ev_barrier = 0;
94 
95 static uint8_t ev_tracking = 0;
96 static uint32_t ev_tracking_barrier = 0;
97 static int ev_tracking_fd = -1;
98 
99 /*
100  * votequorum_exec defines/structs/forward definitions
101  */
104  struct qb_ipc_request_header header __attribute__((aligned(8)));
105  uint32_t nodeid;
106  uint32_t votes;
107  uint32_t expected_votes;
108  uint32_t flags;
109 } __attribute__((packed));
110 
112  struct qb_ipc_request_header header __attribute__((aligned(8)));
113  uint32_t nodeid;
114  uint32_t value;
115  uint8_t param;
116  uint8_t _pad0;
117  uint8_t _pad1;
118  uint8_t _pad2;
119 } __attribute__((packed));
120 
122  struct qb_ipc_request_header header __attribute__((aligned(8)));
123  uint32_t operation;
125 } __attribute__((packed));
126 
128  struct qb_ipc_request_header header __attribute__((aligned(8)));
131 } __attribute__((packed));
132 
133 /*
134  * votequorum_exec onwire version (via totem)
135  */
136 
137 #include "votequorum.h"
138 
139 /*
140  * votequorum_exec onwire messages (via totem)
141  */
142 
143 #define MESSAGE_REQ_EXEC_VOTEQUORUM_NODEINFO 0
144 #define MESSAGE_REQ_EXEC_VOTEQUORUM_RECONFIGURE 1
145 #define MESSAGE_REQ_EXEC_VOTEQUORUM_QDEVICE_REG 2
146 #define MESSAGE_REQ_EXEC_VOTEQUORUM_QDEVICE_RECONFIGURE 3
147 
148 static void votequorum_exec_send_expectedvotes_notification(void);
149 static int votequorum_exec_send_quorum_notification(void *conn, uint64_t context);
150 static int votequorum_exec_send_nodelist_notification(void *conn, uint64_t context);
151 
152 #define VOTEQUORUM_RECONFIG_PARAM_EXPECTED_VOTES 1
153 #define VOTEQUORUM_RECONFIG_PARAM_NODE_VOTES 2
154 #define VOTEQUORUM_RECONFIG_PARAM_CANCEL_WFA 3
155 
156 static int votequorum_exec_send_reconfigure(uint8_t param, unsigned int nodeid, uint32_t value);
157 
158 /*
159  * used by req_exec_quorum_qdevice_reg
160  */
161 #define VOTEQUORUM_QDEVICE_OPERATION_UNREGISTER 0
162 #define VOTEQUORUM_QDEVICE_OPERATION_REGISTER 1
163 
164 /*
165  * votequorum internal node status/view
166  */
167 
168 #define NODE_FLAGS_QUORATE 1
169 #define NODE_FLAGS_LEAVING 2
170 #define NODE_FLAGS_WFASTATUS 4
171 #define NODE_FLAGS_FIRST 8
172 #define NODE_FLAGS_QDEVICE_REGISTERED 16
173 #define NODE_FLAGS_QDEVICE_ALIVE 32
174 #define NODE_FLAGS_QDEVICE_CAST_VOTE 64
175 #define NODE_FLAGS_QDEVICE_MASTER_WINS 128
176 
177 typedef enum {
181 } nodestate_t;
182 
183 struct cluster_node {
184  int node_id;
185  nodestate_t state;
186  uint32_t votes;
187  uint32_t expected_votes;
188  uint32_t flags;
189  struct list_head list;
190 };
191 
192 /*
193  * votequorum internal quorum status
194  */
195 
196 static uint8_t quorum;
197 static uint8_t cluster_is_quorate;
198 
199 /*
200  * votequorum membership data
201  */
202 
203 static struct cluster_node *us;
204 static struct list_head cluster_members_list;
205 static unsigned int quorum_members[PROCESSOR_COUNT_MAX];
206 static unsigned int previous_quorum_members[PROCESSOR_COUNT_MAX];
207 static unsigned int atb_nodelist[PROCESSOR_COUNT_MAX];
208 static int quorum_members_entries = 0;
209 static int previous_quorum_members_entries = 0;
210 static int atb_nodelist_entries = 0;
211 static struct memb_ring_id quorum_ringid;
212 
213 /*
214  * pre allocate all cluster_nodes + one for qdevice
215  */
216 static struct cluster_node cluster_nodes[PROCESSOR_COUNT_MAX+2];
217 static int cluster_nodes_entries = 0;
218 
219 /*
220  * votequorum tracking
221  */
222 struct quorum_pd {
223  unsigned char track_flags;
226  struct list_head list;
227  void *conn;
228 };
229 
230 static struct list_head trackers_list;
231 
232 /*
233  * votequorum timers
234  */
235 
236 static corosync_timer_handle_t qdevice_timer;
237 static int qdevice_timer_set = 0;
238 static corosync_timer_handle_t last_man_standing_timer;
239 static int last_man_standing_timer_set = 0;
240 static int sync_nodeinfo_sent = 0;
241 static int sync_wait_for_poll_or_timeout = 0;
242 
243 /*
244  * Service Interfaces required by service_message_handler struct
245  */
246 
247 static int sync_in_progress = 0;
248 
249 static void votequorum_sync_init (
250  const unsigned int *trans_list,
251  size_t trans_list_entries,
252  const unsigned int *member_list,
253  size_t member_list_entries,
254  const struct memb_ring_id *ring_id);
255 
256 static int votequorum_sync_process (void);
257 static void votequorum_sync_activate (void);
258 static void votequorum_sync_abort (void);
259 
260 static quorum_set_quorate_fn_t quorum_callback;
261 
262 /*
263  * votequorum_exec handler and definitions
264  */
265 
266 static char *votequorum_exec_init_fn (struct corosync_api_v1 *api);
267 static int votequorum_exec_exit_fn (void);
268 static int votequorum_exec_send_nodeinfo(uint32_t nodeid);
269 
270 static void message_handler_req_exec_votequorum_nodeinfo (
271  const void *message,
272  unsigned int nodeid);
273 static void exec_votequorum_nodeinfo_endian_convert (void *message);
274 
275 static void message_handler_req_exec_votequorum_reconfigure (
276  const void *message,
277  unsigned int nodeid);
278 static void exec_votequorum_reconfigure_endian_convert (void *message);
279 
280 static void message_handler_req_exec_votequorum_qdevice_reg (
281  const void *message,
282  unsigned int nodeid);
283 static void exec_votequorum_qdevice_reg_endian_convert (void *message);
284 
285 static void message_handler_req_exec_votequorum_qdevice_reconfigure (
286  const void *message,
287  unsigned int nodeid);
288 static void exec_votequorum_qdevice_reconfigure_endian_convert (void *message);
289 
290 static struct corosync_exec_handler votequorum_exec_engine[] =
291 {
292  { /* 0 */
293  .exec_handler_fn = message_handler_req_exec_votequorum_nodeinfo,
294  .exec_endian_convert_fn = exec_votequorum_nodeinfo_endian_convert
295  },
296  { /* 1 */
297  .exec_handler_fn = message_handler_req_exec_votequorum_reconfigure,
298  .exec_endian_convert_fn = exec_votequorum_reconfigure_endian_convert
299  },
300  { /* 2 */
301  .exec_handler_fn = message_handler_req_exec_votequorum_qdevice_reg,
302  .exec_endian_convert_fn = exec_votequorum_qdevice_reg_endian_convert
303  },
304  { /* 3 */
305  .exec_handler_fn = message_handler_req_exec_votequorum_qdevice_reconfigure,
306  .exec_endian_convert_fn = exec_votequorum_qdevice_reconfigure_endian_convert
307  },
308 };
309 
310 /*
311  * Library Handler and Functions Definitions
312  */
313 
314 static int quorum_lib_init_fn (void *conn);
315 
316 static int quorum_lib_exit_fn (void *conn);
317 
318 static void qdevice_timer_fn(void *arg);
319 
320 static void message_handler_req_lib_votequorum_getinfo (void *conn,
321  const void *message);
322 
323 static void message_handler_req_lib_votequorum_setexpected (void *conn,
324  const void *message);
325 
326 static void message_handler_req_lib_votequorum_setvotes (void *conn,
327  const void *message);
328 
329 static void message_handler_req_lib_votequorum_trackstart (void *conn,
330  const void *message);
331 
332 static void message_handler_req_lib_votequorum_trackstop (void *conn,
333  const void *message);
334 
335 static void message_handler_req_lib_votequorum_qdevice_register (void *conn,
336  const void *message);
337 
338 static void message_handler_req_lib_votequorum_qdevice_unregister (void *conn,
339  const void *message);
340 
341 static void message_handler_req_lib_votequorum_qdevice_update (void *conn,
342  const void *message);
343 
344 static void message_handler_req_lib_votequorum_qdevice_poll (void *conn,
345  const void *message);
346 
347 static void message_handler_req_lib_votequorum_qdevice_master_wins (void *conn,
348  const void *message);
349 
350 static struct corosync_lib_handler quorum_lib_service[] =
351 {
352  { /* 0 */
353  .lib_handler_fn = message_handler_req_lib_votequorum_getinfo,
355  },
356  { /* 1 */
357  .lib_handler_fn = message_handler_req_lib_votequorum_setexpected,
359  },
360  { /* 2 */
361  .lib_handler_fn = message_handler_req_lib_votequorum_setvotes,
363  },
364  { /* 3 */
365  .lib_handler_fn = message_handler_req_lib_votequorum_trackstart,
367  },
368  { /* 4 */
369  .lib_handler_fn = message_handler_req_lib_votequorum_trackstop,
371  },
372  { /* 5 */
373  .lib_handler_fn = message_handler_req_lib_votequorum_qdevice_register,
375  },
376  { /* 6 */
377  .lib_handler_fn = message_handler_req_lib_votequorum_qdevice_unregister,
379  },
380  { /* 7 */
381  .lib_handler_fn = message_handler_req_lib_votequorum_qdevice_update,
383  },
384  { /* 8 */
385  .lib_handler_fn = message_handler_req_lib_votequorum_qdevice_poll,
387  },
388  { /* 9 */
389  .lib_handler_fn = message_handler_req_lib_votequorum_qdevice_master_wins,
391  }
392 };
393 
394 static struct corosync_service_engine votequorum_service_engine = {
395  .name = "corosync vote quorum service v1.0",
396  .id = VOTEQUORUM_SERVICE,
397  .priority = 2,
398  .private_data_size = sizeof (struct quorum_pd),
399  .allow_inquorate = CS_LIB_ALLOW_INQUORATE,
400  .flow_control = COROSYNC_LIB_FLOW_CONTROL_REQUIRED,
401  .lib_init_fn = quorum_lib_init_fn,
402  .lib_exit_fn = quorum_lib_exit_fn,
403  .lib_engine = quorum_lib_service,
404  .lib_engine_count = sizeof (quorum_lib_service) / sizeof (struct corosync_lib_handler),
405  .exec_init_fn = votequorum_exec_init_fn,
406  .exec_exit_fn = votequorum_exec_exit_fn,
407  .exec_engine = votequorum_exec_engine,
408  .exec_engine_count = sizeof (votequorum_exec_engine) / sizeof (struct corosync_exec_handler),
409  .sync_init = votequorum_sync_init,
410  .sync_process = votequorum_sync_process,
411  .sync_activate = votequorum_sync_activate,
412  .sync_abort = votequorum_sync_abort
413 };
414 
416 {
417  return (&votequorum_service_engine);
418 }
419 
420 static struct default_service votequorum_service[] = {
421  {
422  .name = "corosync_votequorum",
423  .ver = 0,
425  },
426 };
427 
428 /*
429  * common/utility macros/functions
430  */
431 
432 #define max(a,b) (((a) > (b)) ? (a) : (b))
433 
434 #define list_iterate(v, head) \
435  for (v = (head)->next; v != head; v = v->next)
436 
437 static void node_add_ordered(struct cluster_node *newnode)
438 {
439  struct cluster_node *node = NULL;
440  struct list_head *tmp;
441  struct list_head *newlist = &newnode->list;
442 
443  ENTER();
444 
445  list_iterate(tmp, &cluster_members_list) {
446  node = list_entry(tmp, struct cluster_node, list);
447  if (newnode->node_id < node->node_id) {
448  break;
449  }
450  }
451 
452  if (!node) {
453  list_add(&newnode->list, &cluster_members_list);
454  } else {
455  newlist->prev = tmp->prev;
456  newlist->next = tmp;
457  tmp->prev->next = newlist;
458  tmp->prev = newlist;
459  }
460 
461  LEAVE();
462 }
463 
464 static struct cluster_node *allocate_node(unsigned int nodeid)
465 {
466  struct cluster_node *cl = NULL;
467  struct list_head *tmp;
468 
469  ENTER();
470 
471  if (cluster_nodes_entries <= PROCESSOR_COUNT_MAX + 1) {
472  cl = (struct cluster_node *)&cluster_nodes[cluster_nodes_entries];
473  cluster_nodes_entries++;
474  } else {
475  list_iterate(tmp, &cluster_members_list) {
476  cl = list_entry(tmp, struct cluster_node, list);
477  if (cl->state == NODESTATE_DEAD) {
478  break;
479  }
480  }
481  /*
482  * this should never happen
483  */
484  if (!cl) {
485  log_printf(LOGSYS_LEVEL_CRIT, "Unable to find memory for node %u data!!", nodeid);
486  goto out;
487  }
488  list_del(tmp);
489  }
490 
491  memset(cl, 0, sizeof(struct cluster_node));
492  cl->node_id = nodeid;
493  if (nodeid != VOTEQUORUM_QDEVICE_NODEID) {
494  node_add_ordered(cl);
495  }
496 
497 out:
498  LEAVE();
499 
500  return cl;
501 }
502 
503 static struct cluster_node *find_node_by_nodeid(unsigned int nodeid)
504 {
505  struct cluster_node *node;
506  struct list_head *tmp;
507 
508  ENTER();
509 
510  if (nodeid == us->node_id) {
511  LEAVE();
512  return us;
513  }
514 
515  if (nodeid == VOTEQUORUM_QDEVICE_NODEID) {
516  LEAVE();
517  return qdevice;
518  }
519 
520  list_iterate(tmp, &cluster_members_list) {
521  node = list_entry(tmp, struct cluster_node, list);
522  if (node->node_id == nodeid) {
523  LEAVE();
524  return node;
525  }
526  }
527 
528  LEAVE();
529  return NULL;
530 }
531 
532 static void get_lowest_node_id(void)
533 {
534  struct cluster_node *node = NULL;
535  struct list_head *tmp;
536 
537  ENTER();
538 
539  lowest_node_id = us->node_id;
540 
541  list_iterate(tmp, &cluster_members_list) {
542  node = list_entry(tmp, struct cluster_node, list);
543  if ((node->state == NODESTATE_MEMBER) &&
544  (node->node_id < lowest_node_id)) {
545  lowest_node_id = node->node_id;
546  }
547  }
548  log_printf(LOGSYS_LEVEL_DEBUG, "lowest node id: %d us: %d", lowest_node_id, us->node_id);
549  icmap_set_uint32("runtime.votequorum.lowest_node_id", lowest_node_id);
550 
551  LEAVE();
552 }
553 
554 static void get_highest_node_id(void)
555 {
556  struct cluster_node *node = NULL;
557  struct list_head *tmp;
558 
559  ENTER();
560 
561  highest_node_id = us->node_id;
562 
563  list_iterate(tmp, &cluster_members_list) {
564  node = list_entry(tmp, struct cluster_node, list);
565  if ((node->state == NODESTATE_MEMBER) &&
566  (node->node_id > highest_node_id)) {
567  highest_node_id = node->node_id;
568  }
569  }
570  log_printf(LOGSYS_LEVEL_DEBUG, "highest node id: %d us: %d", highest_node_id, us->node_id);
571  icmap_set_uint32("runtime.votequorum.highest_node_id", highest_node_id);
572 
573  LEAVE();
574 }
575 
576 static int check_low_node_id_partition(void)
577 {
578  struct cluster_node *node = NULL;
579  struct list_head *tmp;
580  int found = 0;
581 
582  ENTER();
583 
584  list_iterate(tmp, &cluster_members_list) {
585  node = list_entry(tmp, struct cluster_node, list);
586  if ((node->state == NODESTATE_MEMBER) &&
587  (node->node_id == lowest_node_id)) {
588  found = 1;
589  }
590  }
591 
592  LEAVE();
593  return found;
594 }
595 
596 static int check_high_node_id_partition(void)
597 {
598  struct cluster_node *node = NULL;
599  struct list_head *tmp;
600  int found = 0;
601 
602  ENTER();
603 
604  list_iterate(tmp, &cluster_members_list) {
605  node = list_entry(tmp, struct cluster_node, list);
606  if ((node->state == NODESTATE_MEMBER) &&
607  (node->node_id == highest_node_id)) {
608  found = 1;
609  }
610  }
611 
612  LEAVE();
613  return found;
614 }
615 
616 static int is_in_nodelist(int nodeid, unsigned int *members, int entries)
617 {
618  int i;
619  ENTER();
620 
621  for (i=0; i<entries; i++) {
622  if (nodeid == members[i]) {
623  LEAVE();
624  return 1;
625  }
626  }
627  LEAVE();
628  return 0;
629 }
630 
631 /*
632  * The algorithm for a list of tie-breaker nodes is:
633  * travel the list of nodes in the auto_tie_breaker list,
634  * if the node IS in our current partition, check if the
635  * nodes earlier in the atb list are in the 'previous' partition;
636  * If none are found then we are safe to be quorate, if any are
637  * then we cannot be as we don't know if that node is up or down.
638  * If we don't have a node in the current list we are NOT quorate.
639  * Obviously if we find the first node in the atb list in our
640  * partition then we are quorate.
641  *
642  * Special cases lowest nodeid, and highest nodeid are handled separately.
643  */
644 static int check_auto_tie_breaker(void)
645 {
646  int i, j;
647  int res;
648  ENTER();
649 
650  if (auto_tie_breaker == ATB_LOWEST) {
651  res = check_low_node_id_partition();
652  log_printf(LOGSYS_LEVEL_DEBUG, "ATB_LOWEST decision: %d", res);
653  LEAVE();
654  return res;
655  }
656  if (auto_tie_breaker == ATB_HIGHEST) {
657  res = check_high_node_id_partition();
658  log_printf(LOGSYS_LEVEL_DEBUG, "ATB_HIGHEST decision: %d", res);
659  LEAVE();
660  return res;
661  }
662 
663  /* Assume ATB_LIST, we should never be called for ATB_NONE */
664  for (i=0; i < atb_nodelist_entries; i++) {
665  if (is_in_nodelist(atb_nodelist[i], quorum_members, quorum_members_entries)) {
666  /*
667  * Node is in our partition, if any of its predecessors are
668  * in the previous quorum partition then it might be in the
669  * 'other half' (as we've got this far without seeing it here)
670  * and so we can't be quorate.
671  */
672  for (j=0; j<i; j++) {
673  if (is_in_nodelist(atb_nodelist[j], previous_quorum_members, previous_quorum_members_entries)) {
674  log_printf(LOGSYS_LEVEL_DEBUG, "ATB_LIST found node %d in previous partition but not here, quorum denied", atb_nodelist[j]);
675  LEAVE();
676  return 0;
677  }
678  }
679 
680  /*
681  * None of the other list nodes were in the previous partition, if there
682  * are enough votes, we can be quorate
683  */
684  log_printf(LOGSYS_LEVEL_DEBUG, "ATB_LIST found node %d in current partition, we can be quorate", atb_nodelist[i]);
685  LEAVE();
686  return 1;
687  }
688  }
689  log_printf(LOGSYS_LEVEL_DEBUG, "ATB_LIST found no list nodes in current partition, we cannot be quorate");
690  LEAVE();
691  return 0;
692 }
693 
694 /*
695  * atb_string can be either:
696  * 'lowest'
697  * 'highest'
698  * a list of nodeids
699  */
700 static void parse_atb_string(char *atb_string)
701 {
702  char *ptr;
703  long num;
704 
705  ENTER();
706  auto_tie_breaker = ATB_NONE;
707 
708  if (!strcmp(atb_string, "lowest"))
709  auto_tie_breaker = ATB_LOWEST;
710 
711  if (!strcmp(atb_string, "highest"))
712  auto_tie_breaker = ATB_HIGHEST;
713 
714  if (atoi(atb_string)) {
715 
716  atb_nodelist_entries = 0;
717  ptr = atb_string;
718  do {
719  num = strtol(ptr, &ptr, 10);
720  if (num) {
721  log_printf(LOGSYS_LEVEL_DEBUG, "ATB nodelist[%d] = %d", atb_nodelist_entries, num);
722  atb_nodelist[atb_nodelist_entries++] = num;
723  }
724  } while (num);
725 
726  if (atb_nodelist_entries) {
727  auto_tie_breaker = ATB_LIST;
728  }
729  }
730  icmap_set_uint32("runtime.votequorum.atb_type", auto_tie_breaker);
731  log_printf(LOGSYS_LEVEL_DEBUG, "ATB type = %d", auto_tie_breaker);
732 
733  /* Make sure we got something */
734  if (auto_tie_breaker == ATB_NONE) {
735  log_printf(LOGSYS_LEVEL_WARNING, "auto_tie_breaker_nodes is not valid. It must be 'lowest', 'highest' or a space-separated list of node IDs. auto_tie_breaker is disabled");
736  auto_tie_breaker = ATB_NONE;
737  }
738  LEAVE();
739 }
740 
741 static int check_qdevice_master(void)
742 {
743  struct cluster_node *node = NULL;
744  struct list_head *tmp;
745  int found = 0;
746 
747  ENTER();
748 
749  list_iterate(tmp, &cluster_members_list) {
750  node = list_entry(tmp, struct cluster_node, list);
751  if ((node->state == NODESTATE_MEMBER) &&
754  found = 1;
755  }
756  }
757 
758  LEAVE();
759  return found;
760 }
761 
762 static void decode_flags(uint32_t flags)
763 {
764  ENTER();
765 
767  "flags: quorate: %s Leaving: %s WFA Status: %s First: %s Qdevice: %s QdeviceAlive: %s QdeviceCastVote: %s QdeviceMasterWins: %s",
768  (flags & NODE_FLAGS_QUORATE)?"Yes":"No",
769  (flags & NODE_FLAGS_LEAVING)?"Yes":"No",
770  (flags & NODE_FLAGS_WFASTATUS)?"Yes":"No",
771  (flags & NODE_FLAGS_FIRST)?"Yes":"No",
772  (flags & NODE_FLAGS_QDEVICE_REGISTERED)?"Yes":"No",
773  (flags & NODE_FLAGS_QDEVICE_ALIVE)?"Yes":"No",
774  (flags & NODE_FLAGS_QDEVICE_CAST_VOTE)?"Yes":"No",
775  (flags & NODE_FLAGS_QDEVICE_MASTER_WINS)?"Yes":"No");
776 
777  LEAVE();
778 }
779 
780 /*
781  * load/save are copied almost pristine from totemsrp,c
782  */
783 static int load_ev_tracking_barrier(void)
784 {
785  int res = 0;
786  char filename[PATH_MAX];
787 
788  ENTER();
789 
790  snprintf(filename, sizeof(filename) - 1, "%s/ev_tracking", get_run_dir());
791 
792  ev_tracking_fd = open(filename, O_RDWR, 0700);
793  if (ev_tracking_fd != -1) {
794  res = read (ev_tracking_fd, &ev_tracking_barrier, sizeof(uint32_t));
795  close(ev_tracking_fd);
796  if (res == sizeof (uint32_t)) {
797  LEAVE();
798  return 0;
799  }
800  }
801 
802  ev_tracking_barrier = 0;
803  umask(0);
804  ev_tracking_fd = open (filename, O_CREAT|O_RDWR, 0700);
805  if (ev_tracking_fd != -1) {
806  res = write (ev_tracking_fd, &ev_tracking_barrier, sizeof (uint32_t));
807  if ((res == -1) || (res != sizeof (uint32_t))) {
809  "Unable to write to %s", filename);
810  }
811  close(ev_tracking_fd);
812  LEAVE();
813  return 0;
814  }
816  "Unable to create %s file", filename);
817 
818  LEAVE();
819 
820  return -1;
821 }
822 
823 static void update_wait_for_all_status(uint8_t wfa_status)
824 {
825  ENTER();
826 
827  wait_for_all_status = wfa_status;
828  if (wait_for_all_status) {
830  } else {
831  us->flags &= ~NODE_FLAGS_WFASTATUS;
832  }
833  icmap_set_uint8("runtime.votequorum.wait_for_all_status",
834  wait_for_all_status);
835 
836  LEAVE();
837 }
838 
839 static void update_two_node(void)
840 {
841  ENTER();
842 
843  icmap_set_uint8("runtime.votequorum.two_node", two_node);
844 
845  LEAVE();
846 }
847 
848 static void update_ev_barrier(uint32_t expected_votes)
849 {
850  ENTER();
851 
852  ev_barrier = expected_votes;
853  icmap_set_uint32("runtime.votequorum.ev_barrier", ev_barrier);
854 
855  LEAVE();
856 }
857 
858 static void update_qdevice_can_operate(uint8_t status)
859 {
860  ENTER();
861 
862  qdevice_can_operate = status;
863  icmap_set_uint8("runtime.votequorum.qdevice_can_operate", qdevice_can_operate);
864 
865  LEAVE();
866 }
867 
868 static void update_qdevice_master_wins(uint8_t allow)
869 {
870  ENTER();
871 
872  qdevice_master_wins = allow;
873  icmap_set_uint8("runtime.votequorum.qdevice_master_wins", qdevice_master_wins);
874 
875  LEAVE();
876 }
877 
878 static void update_ev_tracking_barrier(uint32_t ev_t_barrier)
879 {
880  int res;
881 
882  ENTER();
883 
884  ev_tracking_barrier = ev_t_barrier;
885  icmap_set_uint32("runtime.votequorum.ev_tracking_barrier", ev_tracking_barrier);
886 
887  if (lseek (ev_tracking_fd, 0, SEEK_SET) != 0) {
889  "Unable to update ev_tracking_barrier on disk data!!!");
890  LEAVE();
891  return;
892  }
893 
894  res = write (ev_tracking_fd, &ev_tracking_barrier, sizeof (uint32_t));
895  if (res != sizeof (uint32_t)) {
897  "Unable to update ev_tracking_barrier on disk data!!!");
898  }
899 #ifdef HAVE_FDATASYNC
900  fdatasync(ev_tracking_fd);
901 #else
902  fsync(ev_tracking_fd);
903 #endif
904 
905  LEAVE();
906 }
907 
908 /*
909  * quorum calculation core bits
910  */
911 
912 static int calculate_quorum(int allow_decrease, unsigned int max_expected, unsigned int *ret_total_votes)
913 {
914  struct list_head *nodelist;
915  struct cluster_node *node;
916  unsigned int total_votes = 0;
917  unsigned int highest_expected = 0;
918  unsigned int newquorum, q1, q2;
919  unsigned int total_nodes = 0;
920 
921  ENTER();
922 
923  if ((allow_downscale) && (allow_decrease) && (max_expected)) {
924  max_expected = max(ev_barrier, max_expected);
925  }
926 
927  list_iterate(nodelist, &cluster_members_list) {
928  node = list_entry(nodelist, struct cluster_node, list);
929 
930  log_printf(LOGSYS_LEVEL_DEBUG, "node %u state=%d, votes=%u, expected=%u",
931  node->node_id, node->state, node->votes, node->expected_votes);
932 
933  if (node->state == NODESTATE_MEMBER) {
934  if (max_expected) {
935  node->expected_votes = max_expected;
936  } else {
937  highest_expected = max(highest_expected, node->expected_votes);
938  }
939  total_votes += node->votes;
940  total_nodes++;
941  }
942  }
943 
945  log_printf(LOGSYS_LEVEL_DEBUG, "node 0 state=1, votes=%u", qdevice->votes);
946  total_votes += qdevice->votes;
947  total_nodes++;
948  }
949 
950  if (max_expected > 0) {
951  highest_expected = max_expected;
952  }
953 
954  /*
955  * This quorum calculation is taken from the OpenVMS Cluster Systems
956  * manual, but, then, you guessed that didn't you
957  */
958  q1 = (highest_expected + 2) / 2;
959  q2 = (total_votes + 2) / 2;
960  newquorum = max(q1, q2);
961 
962  /*
963  * Normally quorum never decreases but the system administrator can
964  * force it down by setting expected votes to a maximum value
965  */
966  if (!allow_decrease) {
967  newquorum = max(quorum, newquorum);
968  }
969 
970  /*
971  * The special two_node mode allows each of the two nodes to retain
972  * quorum if the other fails. Only one of the two should live past
973  * fencing (as both nodes try to fence each other in split-brain.)
974  * Also: if there are more than two nodes, force us inquorate to avoid
975  * any damage or confusion.
976  */
977  if (two_node && total_nodes <= 2) {
978  newquorum = 1;
979  }
980 
981  if (ret_total_votes) {
982  *ret_total_votes = total_votes;
983  }
984 
985  LEAVE();
986  return newquorum;
987 }
988 
989 static void are_we_quorate(unsigned int total_votes)
990 {
991  int quorate;
992  int quorum_change = 0;
993 
994  ENTER();
995 
996  /*
997  * wait for all nodes to show up before granting quorum
998  */
999 
1000  if ((wait_for_all) && (wait_for_all_status)) {
1001  if (total_votes != us->expected_votes) {
1003  "Waiting for all cluster members. "
1004  "Current votes: %d expected_votes: %d",
1005  total_votes, us->expected_votes);
1006  cluster_is_quorate = 0;
1007  return;
1008  }
1009  update_wait_for_all_status(0);
1010  }
1011 
1012  if (quorum > total_votes) {
1013  quorate = 0;
1014  } else {
1015  quorate = 1;
1016  get_lowest_node_id();
1017  get_highest_node_id();
1018  }
1019 
1020  if ((auto_tie_breaker != ATB_NONE) &&
1021  /* Must be a half (or half-1) split */
1022  (total_votes == (us->expected_votes / 2)) &&
1023  /* If the 'other' partition in a split might have quorum then we can't run ATB */
1024  (previous_quorum_members_entries - quorum_members_entries < quorum) &&
1025  (check_auto_tie_breaker() == 1)) {
1026  quorate = 1;
1027  }
1028 
1029  if ((qdevice_master_wins) &&
1030  (!quorate) &&
1031  (check_qdevice_master() == 1)) {
1032  log_printf(LOGSYS_LEVEL_DEBUG, "node is quorate as part of master_wins partition");
1033  quorate = 1;
1034  }
1035 
1036  if (cluster_is_quorate && !quorate) {
1037  quorum_change = 1;
1038  log_printf(LOGSYS_LEVEL_DEBUG, "quorum lost, blocking activity");
1039  }
1040  if (!cluster_is_quorate && quorate) {
1041  quorum_change = 1;
1042  log_printf(LOGSYS_LEVEL_DEBUG, "quorum regained, resuming activity");
1043  }
1044 
1045  cluster_is_quorate = quorate;
1046  if (cluster_is_quorate) {
1047  us->flags |= NODE_FLAGS_QUORATE;
1048  } else {
1049  us->flags &= ~NODE_FLAGS_QUORATE;
1050  }
1051 
1052  if (wait_for_all) {
1053  if (quorate) {
1054  update_wait_for_all_status(0);
1055  } else {
1056  update_wait_for_all_status(1);
1057  }
1058  }
1059 
1060  if ((quorum_change) &&
1061  (sync_in_progress == 0)) {
1062  quorum_callback(quorum_members, quorum_members_entries,
1063  cluster_is_quorate, &quorum_ringid);
1064  votequorum_exec_send_quorum_notification(NULL, 0L);
1065  }
1066 
1067  LEAVE();
1068 }
1069 
1070 static void get_total_votes(unsigned int *totalvotes, unsigned int *current_members)
1071 {
1072  unsigned int total_votes = 0;
1073  unsigned int cluster_members = 0;
1074  struct list_head *nodelist;
1075  struct cluster_node *node;
1076 
1077  ENTER();
1078 
1079  list_iterate(nodelist, &cluster_members_list) {
1080  node = list_entry(nodelist, struct cluster_node, list);
1081  if (node->state == NODESTATE_MEMBER) {
1082  cluster_members++;
1083  total_votes += node->votes;
1084  }
1085  }
1086 
1087  if (qdevice->votes) {
1088  total_votes += qdevice->votes;
1089  cluster_members++;
1090  }
1091 
1092  *totalvotes = total_votes;
1093  *current_members = cluster_members;
1094 
1095  LEAVE();
1096 }
1097 
1098 /*
1099  * Recalculate cluster quorum, set quorate and notify changes
1100  */
1101 static void recalculate_quorum(int allow_decrease, int by_current_nodes)
1102 {
1103  unsigned int total_votes = 0;
1104  unsigned int cluster_members = 0;
1105 
1106  ENTER();
1107 
1108  get_total_votes(&total_votes, &cluster_members);
1109 
1110  if (!by_current_nodes) {
1111  cluster_members = 0;
1112  }
1113 
1114  /*
1115  * Keep expected_votes at the highest number of votes in the cluster
1116  */
1117  log_printf(LOGSYS_LEVEL_DEBUG, "total_votes=%d, expected_votes=%d", total_votes, us->expected_votes);
1118  if (total_votes > us->expected_votes) {
1119  us->expected_votes = total_votes;
1120  votequorum_exec_send_expectedvotes_notification();
1121  }
1122 
1123  if ((ev_tracking) &&
1124  (us->expected_votes > ev_tracking_barrier)) {
1125  update_ev_tracking_barrier(us->expected_votes);
1126  }
1127 
1128  quorum = calculate_quorum(allow_decrease, cluster_members, &total_votes);
1129  are_we_quorate(total_votes);
1130 
1131  LEAVE();
1132 }
1133 
1134 /*
1135  * configuration bits and pieces
1136  */
1137 
1138 static int votequorum_read_nodelist_configuration(uint32_t *votes,
1139  uint32_t *nodes,
1140  uint32_t *expected_votes)
1141 {
1142  icmap_iter_t iter;
1143  const char *iter_key;
1144  char tmp_key[ICMAP_KEYNAME_MAXLEN];
1145  uint32_t our_pos, node_pos;
1146  uint32_t nodecount = 0;
1147  uint32_t nodelist_expected_votes = 0;
1148  uint32_t node_votes = 0;
1149  int res = 0;
1150 
1151  ENTER();
1152 
1153  if (icmap_get_uint32("nodelist.local_node_pos", &our_pos) != CS_OK) {
1155  "No nodelist defined or our node is not in the nodelist");
1156  return 0;
1157  }
1158 
1159  iter = icmap_iter_init("nodelist.node.");
1160 
1161  while ((iter_key = icmap_iter_next(iter, NULL, NULL)) != NULL) {
1162 
1163  res = sscanf(iter_key, "nodelist.node.%u.%s", &node_pos, tmp_key);
1164  if (res != 2) {
1165  continue;
1166  }
1167 
1168  if (strcmp(tmp_key, "ring0_addr") != 0) {
1169  continue;
1170  }
1171 
1172  nodecount++;
1173 
1174  snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "nodelist.node.%u.quorum_votes", node_pos);
1175  if (icmap_get_uint32(tmp_key, &node_votes) != CS_OK) {
1176  node_votes = 1;
1177  }
1178 
1179  nodelist_expected_votes = nodelist_expected_votes + node_votes;
1180 
1181  if (node_pos == our_pos) {
1182  *votes = node_votes;
1183  }
1184  }
1185 
1186  *expected_votes = nodelist_expected_votes;
1187  *nodes = nodecount;
1188 
1189  icmap_iter_finalize(iter);
1190 
1191  LEAVE();
1192 
1193  return 1;
1194 }
1195 
1196 static int votequorum_qdevice_is_configured(uint32_t *qdevice_votes)
1197 {
1198  char *qdevice_model = NULL;
1199  int ret = 0;
1200 
1201  ENTER();
1202 
1203  if (icmap_get_string("quorum.device.model", &qdevice_model) == CS_OK) {
1204  if (strlen(qdevice_model)) {
1205  if (icmap_get_uint32("quorum.device.votes", qdevice_votes) != CS_OK) {
1206  *qdevice_votes = -1;
1207  }
1208  if (icmap_get_uint32("quorum.device.timeout", &qdevice_timeout) != CS_OK) {
1209  qdevice_timeout = VOTEQUORUM_QDEVICE_DEFAULT_TIMEOUT;
1210  }
1211  if (icmap_get_uint32("quorum.device.sync_timeout", &qdevice_sync_timeout) != CS_OK) {
1212  qdevice_sync_timeout = VOTEQUORUM_QDEVICE_DEFAULT_SYNC_TIMEOUT;
1213  }
1214  update_qdevice_can_operate(1);
1215  ret = 1;
1216  }
1217 
1218  free(qdevice_model);
1219  }
1220 
1221  LEAVE();
1222 
1223  return ret;
1224 }
1225 
1226 #define VOTEQUORUM_READCONFIG_STARTUP 0
1227 #define VOTEQUORUM_READCONFIG_RUNTIME 1
1228 
1229 static char *votequorum_readconfig(int runtime)
1230 {
1231  uint32_t node_votes = 0, qdevice_votes = 0;
1232  uint32_t node_expected_votes = 0, expected_votes = 0;
1233  uint32_t node_count = 0;
1234  uint8_t atb = 0;
1235  int have_nodelist, have_qdevice;
1236  char *atb_string = NULL;
1237  char *error = NULL;
1238 
1239  ENTER();
1240 
1241  log_printf(LOGSYS_LEVEL_DEBUG, "Reading configuration (runtime: %d)", runtime);
1242 
1243  /*
1244  * Set the few things we re-read at runtime back to their defaults
1245  */
1246  if (runtime) {
1247  two_node = 0;
1248  expected_votes = 0;
1249  }
1250 
1251  /*
1252  * gather basic data here
1253  */
1254  icmap_get_uint32("quorum.expected_votes", &expected_votes);
1255  have_nodelist = votequorum_read_nodelist_configuration(&node_votes, &node_count, &node_expected_votes);
1256  have_qdevice = votequorum_qdevice_is_configured(&qdevice_votes);
1257  icmap_get_uint8("quorum.two_node", &two_node);
1258 
1259  /*
1260  * do config verification and enablement
1261  */
1262 
1263  if ((!have_nodelist) && (!expected_votes)) {
1264  if (!runtime) {
1265  error = (char *)"configuration error: nodelist or quorum.expected_votes must be configured!";
1266  } else {
1267  log_printf(LOGSYS_LEVEL_CRIT, "configuration error: nodelist or quorum.expected_votes must be configured!");
1268  log_printf(LOGSYS_LEVEL_CRIT, "will continue with current runtime data");
1269  }
1270  goto out;
1271  }
1272 
1273  /*
1274  * two_node and qdevice are not compatible in the same config.
1275  * try to make an educated guess of what to do
1276  */
1277 
1278  if ((two_node) && (have_qdevice)) {
1279  if (!runtime) {
1280  error = (char *)"configuration error: two_node and quorum device cannot be configured at the same time!";
1281  goto out;
1282  } else {
1283  log_printf(LOGSYS_LEVEL_CRIT, "configuration error: two_node and quorum device cannot be configured at the same time!");
1285  log_printf(LOGSYS_LEVEL_CRIT, "quorum device is registered, disabling two_node");
1286  two_node = 0;
1287  } else {
1288  log_printf(LOGSYS_LEVEL_CRIT, "quorum device is not registered, allowing two_node");
1289  update_qdevice_can_operate(0);
1290  }
1291  }
1292  }
1293 
1294  /*
1295  * Enable special features
1296  */
1297  if (!runtime) {
1298  if (two_node) {
1299  wait_for_all = 1;
1300  }
1301 
1302  icmap_get_uint8("quorum.allow_downscale", &allow_downscale);
1303  icmap_get_uint8("quorum.wait_for_all", &wait_for_all);
1304  icmap_get_uint8("quorum.last_man_standing", &last_man_standing);
1305  icmap_get_uint32("quorum.last_man_standing_window", &last_man_standing_window);
1306  icmap_get_uint8("quorum.expected_votes_tracking", &ev_tracking);
1307  icmap_get_uint8("quorum.auto_tie_breaker", &atb);
1308  icmap_get_string("quorum.auto_tie_breaker_node", &atb_string);
1309 
1310  /* auto_tie_breaker defaults to LOWEST */
1311  if (atb) {
1312  auto_tie_breaker = ATB_LOWEST;
1313  icmap_set_uint32("runtime.votequorum.atb_type", auto_tie_breaker);
1314  }
1315  else {
1316  auto_tie_breaker = ATB_NONE;
1317  if (atb_string) {
1319  "auto_tie_breaker_node: is meaningless if auto_tie_breaker is set to 0");
1320  }
1321  }
1322 
1323  if (atb && atb_string) {
1324  parse_atb_string(atb_string);
1325  }
1326  free(atb_string);
1327 
1328  /* allow_downscale requires ev_tracking */
1329  if (allow_downscale) {
1330  ev_tracking = 1;
1331  }
1332 
1333  if (ev_tracking) {
1334  if (load_ev_tracking_barrier() < 0) {
1335  LEAVE();
1336  return ((char *)"Unable to load ev_tracking file!");
1337  }
1338  update_ev_tracking_barrier(ev_tracking_barrier);
1339  }
1340 
1341  }
1342 
1343  /* two_node and auto_tie_breaker are not compatible as two_node uses
1344  * a fence race to decide quorum whereas ATB decides based on node id
1345  */
1346  if (two_node && auto_tie_breaker != ATB_NONE) {
1347  log_printf(LOGSYS_LEVEL_CRIT, "two_node and auto_tie_breaker are both specified but are not compatible.");
1348  log_printf(LOGSYS_LEVEL_CRIT, "two_node has been disabled, please fix your corosync.conf");
1349  two_node = 0;
1350  }
1351 
1352  /* If ATB is set and the cluster has an odd number of nodes then wait_for_all needs
1353  * to be set so that an isolated half+1 without the tie breaker node
1354  * does not have quorum on reboot.
1355  */
1356  if ((auto_tie_breaker != ATB_NONE) && (node_expected_votes % 2) &&
1357  (!wait_for_all)) {
1358  if (last_man_standing) {
1359  /* if LMS is set too, it's a fatal configuration error. We can't dictate to the user what
1360  * they might want so we'll just quit.
1361  */
1362  log_printf(LOGSYS_LEVEL_CRIT, "auto_tie_breaker is set, the cluster has an odd number of nodes\n");
1363  log_printf(LOGSYS_LEVEL_CRIT, "and last_man_standing is also set. With this situation a better\n");
1364  log_printf(LOGSYS_LEVEL_CRIT, "solution would be to disable LMS, leave ATB enabled, and also\n");
1365  log_printf(LOGSYS_LEVEL_CRIT, "enable wait_for_all (mandatory for ATB in odd-numbered clusters).\n");
1366  log_printf(LOGSYS_LEVEL_CRIT, "Due to this ambiguity, corosync will fail to start. Please fix your corosync.conf\n");
1367  error = (char *)"configuration error: auto_tie_breaker & last_man_standing not available in odd sized cluster";
1368  goto out;
1369  }
1370  else {
1371  log_printf(LOGSYS_LEVEL_CRIT, "auto_tie_breaker is set and the cluster has an odd number of nodes.\n");
1372  log_printf(LOGSYS_LEVEL_CRIT, "wait_for_all needs to be set for this configuration but it is missing\n");
1373  log_printf(LOGSYS_LEVEL_CRIT, "Therefore auto_tie_breaker has been disabled. Please fix your corosync.conf\n");
1374  auto_tie_breaker = ATB_NONE;
1375  icmap_set_uint32("runtime.votequorum.atb_type", auto_tie_breaker);
1376  }
1377  }
1378 
1379  /*
1380  * quorum device is not compatible with last_man_standing and auto_tie_breaker
1381  * neither lms or atb can be set at runtime, so there is no need to check for
1382  * runtime incompatibilities, but qdevice can be configured _after_ LMS and ATB have
1383  * been enabled at startup.
1384  */
1385 
1386  if ((have_qdevice) && (last_man_standing)) {
1387  if (!runtime) {
1388  error = (char *)"configuration error: quorum.device is not compatible with last_man_standing";
1389  goto out;
1390  } else {
1391  log_printf(LOGSYS_LEVEL_CRIT, "configuration error: quorum.device is not compatible with last_man_standing");
1392  log_printf(LOGSYS_LEVEL_CRIT, "disabling quorum device operations");
1393  update_qdevice_can_operate(0);
1394  }
1395  }
1396 
1397  if ((have_qdevice) && (auto_tie_breaker != ATB_NONE)) {
1398  if (!runtime) {
1399  error = (char *)"configuration error: quorum.device is not compatible with auto_tie_breaker";
1400  goto out;
1401  } else {
1402  log_printf(LOGSYS_LEVEL_CRIT, "configuration error: quorum.device is not compatible with auto_tie_breaker");
1403  log_printf(LOGSYS_LEVEL_CRIT, "disabling quorum device operations");
1404  update_qdevice_can_operate(0);
1405  }
1406  }
1407 
1408  if ((have_qdevice) && (allow_downscale)) {
1409  if (!runtime) {
1410  error = (char *)"configuration error: quorum.device is not compatible with allow_downscale";
1411  goto out;
1412  } else {
1413  log_printf(LOGSYS_LEVEL_CRIT, "configuration error: quorum.device is not compatible with allow_downscale");
1414  log_printf(LOGSYS_LEVEL_CRIT, "disabling quorum device operations");
1415  update_qdevice_can_operate(0);
1416  }
1417  }
1418 
1419  /*
1420  * if user specifies quorum.expected_votes + quorum.device but NOT the device.votes
1421  * we don't know what the quorum device should vote.
1422  */
1423 
1424  if ((expected_votes) && (have_qdevice) && (qdevice_votes == -1)) {
1425  if (!runtime) {
1426  error = (char *)"configuration error: quorum.device.votes must be specified when quorum.expected_votes is set";
1427  goto out;
1428  } else {
1429  log_printf(LOGSYS_LEVEL_CRIT, "configuration error: quorum.device.votes must be specified when quorum.expected_votes is set");
1430  log_printf(LOGSYS_LEVEL_CRIT, "disabling quorum device operations");
1431  update_qdevice_can_operate(0);
1432  }
1433  }
1434 
1435  /*
1436  * if user specifies a node list with uneven votes and no device.votes
1437  * we cannot autocalculate the votes
1438  */
1439 
1440  if ((have_qdevice) &&
1441  (qdevice_votes == -1) &&
1442  (have_nodelist) &&
1443  (node_count != node_expected_votes)) {
1444  if (!runtime) {
1445  error = (char *)"configuration error: quorum.device.votes must be specified when not all nodes votes 1";
1446  goto out;
1447  } else {
1448  log_printf(LOGSYS_LEVEL_CRIT, "configuration error: quorum.device.votes must be specified when not all nodes votes 1");
1449  log_printf(LOGSYS_LEVEL_CRIT, "disabling quorum device operations");
1450  update_qdevice_can_operate(0);
1451  }
1452  }
1453 
1454  /*
1455  * validate quorum device votes vs expected_votes
1456  */
1457 
1458  if ((qdevice_votes > 0) && (expected_votes)) {
1459  int delta = expected_votes - qdevice_votes;
1460  if (delta < 2) {
1461  if (!runtime) {
1462  error = (char *)"configuration error: quorum.device.votes is too high or expected_votes is too low";
1463  goto out;
1464  } else {
1465  log_printf(LOGSYS_LEVEL_CRIT, "configuration error: quorum.device.votes is too high or expected_votes is too low");
1466  log_printf(LOGSYS_LEVEL_CRIT, "disabling quorum device operations");
1467  update_qdevice_can_operate(0);
1468  }
1469  }
1470  }
1471 
1472  /*
1473  * automatically calculate device votes and adjust expected_votes from nodelist
1474  */
1475 
1476  if ((have_qdevice) &&
1477  (qdevice_votes == -1) &&
1478  (!expected_votes) &&
1479  (have_nodelist) &&
1480  (node_count == node_expected_votes)) {
1481  qdevice_votes = node_expected_votes - 1;
1482  node_expected_votes = node_expected_votes + qdevice_votes;
1483  }
1484 
1485  /*
1486  * set this node votes and expected_votes
1487  */
1488  log_printf(LOGSYS_LEVEL_DEBUG, "ev_tracking=%d, ev_tracking_barrier = %d: expected_votes = %d\n", ev_tracking, ev_tracking_barrier, expected_votes);
1489 
1490  if (ev_tracking) {
1491  expected_votes = ev_tracking_barrier;
1492  }
1493 
1494  if (have_nodelist) {
1495  us->votes = node_votes;
1496  us->expected_votes = node_expected_votes;
1497  } else {
1498  us->votes = 1;
1499  icmap_get_uint32("quorum.votes", &us->votes);
1500  }
1501 
1502  if (expected_votes) {
1504  }
1505 
1506  /*
1507  * set qdevice votes
1508  */
1509 
1510  if (!have_qdevice) {
1511  qdevice->votes = 0;
1512  }
1513 
1514  if (qdevice_votes != -1) {
1515  qdevice->votes = qdevice_votes;
1516  }
1517 
1518  update_ev_barrier(us->expected_votes);
1519  update_two_node();
1520  if (wait_for_all) {
1521  update_wait_for_all_status(1);
1522  }
1523 
1524 out:
1525  LEAVE();
1526  return error;
1527 }
1528 
1529 static void votequorum_refresh_config(
1530  int32_t event,
1531  const char *key_name,
1532  struct icmap_notify_value new_val,
1533  struct icmap_notify_value old_val,
1534  void *user_data)
1535 {
1536  int old_votes, old_expected_votes;
1537  uint8_t reloading;
1538  uint8_t cancel_wfa;
1539 
1540  ENTER();
1541 
1542  /*
1543  * If a full reload is in progress then don't do anything until it's done and
1544  * can reconfigure it all atomically
1545  */
1546  if (icmap_get_uint8("config.totemconfig_reload_in_progress", &reloading) == CS_OK && reloading) {
1547  return ;
1548  }
1549 
1550  icmap_get_uint8("quorum.cancel_wait_for_all", &cancel_wfa);
1551  if (strcmp(key_name, "quorum.cancel_wait_for_all") == 0 &&
1552  cancel_wfa >= 1) {
1553  icmap_set_uint8("quorum.cancel_wait_for_all", 0);
1554  votequorum_exec_send_reconfigure(VOTEQUORUM_RECONFIG_PARAM_CANCEL_WFA,
1555  us->node_id, 0);
1556  return;
1557  }
1558 
1559  old_votes = us->votes;
1560  old_expected_votes = us->expected_votes;
1561 
1562  /*
1563  * Reload the configuration
1564  */
1565  votequorum_readconfig(VOTEQUORUM_READCONFIG_RUNTIME);
1566 
1567  /*
1568  * activate new config
1569  */
1570  votequorum_exec_send_nodeinfo(us->node_id);
1571  votequorum_exec_send_nodeinfo(VOTEQUORUM_QDEVICE_NODEID);
1572  if (us->votes != old_votes) {
1573  votequorum_exec_send_reconfigure(VOTEQUORUM_RECONFIG_PARAM_NODE_VOTES,
1574  us->node_id, us->votes);
1575  }
1576  if (us->expected_votes != old_expected_votes) {
1577  votequorum_exec_send_reconfigure(VOTEQUORUM_RECONFIG_PARAM_EXPECTED_VOTES,
1578  us->node_id, us->expected_votes);
1579  }
1580 
1581  LEAVE();
1582 }
1583 
1584 static void votequorum_exec_add_config_notification(void)
1585 {
1586  icmap_track_t icmap_track_nodelist = NULL;
1587  icmap_track_t icmap_track_quorum = NULL;
1588  icmap_track_t icmap_track_reload = NULL;
1589 
1590  ENTER();
1591 
1592  icmap_track_add("nodelist.",
1594  votequorum_refresh_config,
1595  NULL,
1596  &icmap_track_nodelist);
1597 
1598  icmap_track_add("quorum.",
1600  votequorum_refresh_config,
1601  NULL,
1602  &icmap_track_quorum);
1603 
1604  icmap_track_add("config.totemconfig_reload_in_progress",
1606  votequorum_refresh_config,
1607  NULL,
1608  &icmap_track_reload);
1609 
1610  LEAVE();
1611 }
1612 
1613 /*
1614  * votequorum_exec core
1615  */
1616 
1617 static int votequorum_exec_send_reconfigure(uint8_t param, unsigned int nodeid, uint32_t value)
1618 {
1619  struct req_exec_quorum_reconfigure req_exec_quorum_reconfigure;
1620  struct iovec iov[1];
1621  int ret;
1622 
1623  ENTER();
1624 
1625  req_exec_quorum_reconfigure.nodeid = nodeid;
1626  req_exec_quorum_reconfigure.value = value;
1627  req_exec_quorum_reconfigure.param = param;
1628  req_exec_quorum_reconfigure._pad0 = 0;
1629  req_exec_quorum_reconfigure._pad1 = 0;
1630  req_exec_quorum_reconfigure._pad2 = 0;
1631 
1632  req_exec_quorum_reconfigure.header.id = SERVICE_ID_MAKE(VOTEQUORUM_SERVICE, MESSAGE_REQ_EXEC_VOTEQUORUM_RECONFIGURE);
1633  req_exec_quorum_reconfigure.header.size = sizeof(req_exec_quorum_reconfigure);
1634 
1635  iov[0].iov_base = (void *)&req_exec_quorum_reconfigure;
1636  iov[0].iov_len = sizeof(req_exec_quorum_reconfigure);
1637 
1638  ret = corosync_api->totem_mcast (iov, 1, TOTEM_AGREED);
1639 
1640  LEAVE();
1641  return ret;
1642 }
1643 
1644 static int votequorum_exec_send_nodeinfo(uint32_t nodeid)
1645 {
1646  struct req_exec_quorum_nodeinfo req_exec_quorum_nodeinfo;
1647  struct iovec iov[1];
1648  struct cluster_node *node;
1649  int ret;
1650 
1651  ENTER();
1652 
1653  node = find_node_by_nodeid(nodeid);
1654  if (!node) {
1655  return -1;
1656  }
1657 
1658  req_exec_quorum_nodeinfo.nodeid = nodeid;
1659  req_exec_quorum_nodeinfo.votes = node->votes;
1660  req_exec_quorum_nodeinfo.expected_votes = node->expected_votes;
1661  req_exec_quorum_nodeinfo.flags = node->flags;
1662  if (nodeid != VOTEQUORUM_QDEVICE_NODEID) {
1663  decode_flags(node->flags);
1664  }
1665 
1666  req_exec_quorum_nodeinfo.header.id = SERVICE_ID_MAKE(VOTEQUORUM_SERVICE, MESSAGE_REQ_EXEC_VOTEQUORUM_NODEINFO);
1667  req_exec_quorum_nodeinfo.header.size = sizeof(req_exec_quorum_nodeinfo);
1668 
1669  iov[0].iov_base = (void *)&req_exec_quorum_nodeinfo;
1670  iov[0].iov_len = sizeof(req_exec_quorum_nodeinfo);
1671 
1672  ret = corosync_api->totem_mcast (iov, 1, TOTEM_AGREED);
1673 
1674  LEAVE();
1675  return ret;
1676 }
1677 
1678 static int votequorum_exec_send_qdevice_reconfigure(const char *oldname, const char *newname)
1679 {
1680  struct req_exec_quorum_qdevice_reconfigure req_exec_quorum_qdevice_reconfigure;
1681  struct iovec iov[1];
1682  int ret;
1683 
1684  ENTER();
1685 
1686  req_exec_quorum_qdevice_reconfigure.header.id = SERVICE_ID_MAKE(VOTEQUORUM_SERVICE, MESSAGE_REQ_EXEC_VOTEQUORUM_QDEVICE_RECONFIGURE);
1687  req_exec_quorum_qdevice_reconfigure.header.size = sizeof(req_exec_quorum_qdevice_reconfigure);
1688  strcpy(req_exec_quorum_qdevice_reconfigure.oldname, oldname);
1689  strcpy(req_exec_quorum_qdevice_reconfigure.newname, newname);
1690 
1691  iov[0].iov_base = (void *)&req_exec_quorum_qdevice_reconfigure;
1692  iov[0].iov_len = sizeof(req_exec_quorum_qdevice_reconfigure);
1693 
1694  ret = corosync_api->totem_mcast (iov, 1, TOTEM_AGREED);
1695 
1696  LEAVE();
1697  return ret;
1698 }
1699 
1700 static int votequorum_exec_send_qdevice_reg(uint32_t operation, const char *qdevice_name_req)
1701 {
1702  struct req_exec_quorum_qdevice_reg req_exec_quorum_qdevice_reg;
1703  struct iovec iov[1];
1704  int ret;
1705 
1706  ENTER();
1707 
1708  req_exec_quorum_qdevice_reg.header.id = SERVICE_ID_MAKE(VOTEQUORUM_SERVICE, MESSAGE_REQ_EXEC_VOTEQUORUM_QDEVICE_REG);
1709  req_exec_quorum_qdevice_reg.header.size = sizeof(req_exec_quorum_qdevice_reg);
1710  req_exec_quorum_qdevice_reg.operation = operation;
1711  strcpy(req_exec_quorum_qdevice_reg.qdevice_name, qdevice_name_req);
1712 
1713  iov[0].iov_base = (void *)&req_exec_quorum_qdevice_reg;
1714  iov[0].iov_len = sizeof(req_exec_quorum_qdevice_reg);
1715 
1716  ret = corosync_api->totem_mcast (iov, 1, TOTEM_AGREED);
1717 
1718  LEAVE();
1719  return ret;
1720 }
1721 
1722 static int votequorum_exec_send_quorum_notification(void *conn, uint64_t context)
1723 {
1724  struct res_lib_votequorum_quorum_notification *res_lib_votequorum_notification;
1725  struct list_head *tmp;
1726  struct cluster_node *node;
1727  int i = 0;
1728  int cluster_members = 0;
1729  int size;
1730  char buf[sizeof(struct res_lib_votequorum_quorum_notification) + sizeof(struct votequorum_node) * (PROCESSOR_COUNT_MAX + 2)];
1731 
1732  ENTER();
1733 
1734  log_printf(LOGSYS_LEVEL_DEBUG, "Sending quorum callback, quorate = %d", cluster_is_quorate);
1735 
1736  list_iterate(tmp, &cluster_members_list) {
1737  node = list_entry(tmp, struct cluster_node, list);
1738  cluster_members++;
1739  }
1741  cluster_members++;
1742  }
1743 
1744  size = sizeof(struct res_lib_votequorum_quorum_notification) + sizeof(struct votequorum_node) * cluster_members;
1745 
1746  res_lib_votequorum_notification = (struct res_lib_votequorum_quorum_notification *)&buf;
1747  res_lib_votequorum_notification->quorate = cluster_is_quorate;
1748  res_lib_votequorum_notification->context = context;
1749  res_lib_votequorum_notification->node_list_entries = cluster_members;
1750  res_lib_votequorum_notification->header.id = MESSAGE_RES_VOTEQUORUM_QUORUM_NOTIFICATION;
1751  res_lib_votequorum_notification->header.size = size;
1752  res_lib_votequorum_notification->header.error = CS_OK;
1753 
1754  /* Send all known nodes and their states */
1755  list_iterate(tmp, &cluster_members_list) {
1756  node = list_entry(tmp, struct cluster_node, list);
1757  res_lib_votequorum_notification->node_list[i].nodeid = node->node_id;
1758  res_lib_votequorum_notification->node_list[i++].state = node->state;
1759  }
1761  res_lib_votequorum_notification->node_list[i].nodeid = VOTEQUORUM_QDEVICE_NODEID;
1762  res_lib_votequorum_notification->node_list[i++].state = qdevice->state;
1763  }
1764 
1765  /* Send it to all interested parties */
1766  if (conn) {
1767  int ret = corosync_api->ipc_dispatch_send(conn, &buf, size);
1768  LEAVE();
1769  return ret;
1770  } else {
1771  struct quorum_pd *qpd;
1772 
1773  list_iterate(tmp, &trackers_list) {
1774  qpd = list_entry(tmp, struct quorum_pd, list);
1775  res_lib_votequorum_notification->context = qpd->tracking_context;
1776  corosync_api->ipc_dispatch_send(qpd->conn, &buf, size);
1777  }
1778  }
1779 
1780  LEAVE();
1781 
1782  return 0;
1783 }
1784 
1785 static int votequorum_exec_send_nodelist_notification(void *conn, uint64_t context)
1786 {
1787  struct res_lib_votequorum_nodelist_notification *res_lib_votequorum_notification;
1788  int i = 0;
1789  int size;
1790  struct list_head *tmp;
1791  char buf[sizeof(struct res_lib_votequorum_nodelist_notification) + sizeof(uint32_t) * quorum_members_entries];
1792 
1793  ENTER();
1794 
1795  log_printf(LOGSYS_LEVEL_DEBUG, "Sending nodelist callback. ring_id = %d/%lld", quorum_ringid.rep.nodeid, quorum_ringid.seq);
1796 
1797  size = sizeof(struct res_lib_votequorum_nodelist_notification) + sizeof(uint32_t) * quorum_members_entries;
1798 
1799  res_lib_votequorum_notification = (struct res_lib_votequorum_nodelist_notification *)&buf;
1800  res_lib_votequorum_notification->node_list_entries = quorum_members_entries;
1801  res_lib_votequorum_notification->ring_id.nodeid = quorum_ringid.rep.nodeid;
1802  res_lib_votequorum_notification->ring_id.seq = quorum_ringid.seq;
1803  res_lib_votequorum_notification->context = context;
1804 
1805  for (i=0; i<quorum_members_entries; i++) {
1806  res_lib_votequorum_notification->node_list[i] = quorum_members[i];
1807  }
1808 
1809  res_lib_votequorum_notification->header.id = MESSAGE_RES_VOTEQUORUM_NODELIST_NOTIFICATION;
1810  res_lib_votequorum_notification->header.size = size;
1811  res_lib_votequorum_notification->header.error = CS_OK;
1812 
1813  /* Send it to all interested parties */
1814  if (conn) {
1815  int ret = corosync_api->ipc_dispatch_send(conn, &buf, size);
1816  LEAVE();
1817  return ret;
1818  } else {
1819  struct quorum_pd *qpd;
1820 
1821  list_iterate(tmp, &trackers_list) {
1822  qpd = list_entry(tmp, struct quorum_pd, list);
1823  res_lib_votequorum_notification->context = qpd->tracking_context;
1824  corosync_api->ipc_dispatch_send(qpd->conn, &buf, size);
1825  }
1826  }
1827 
1828  LEAVE();
1829 
1830  return 0;
1831 }
1832 
1833 static void votequorum_exec_send_expectedvotes_notification(void)
1834 {
1835  struct res_lib_votequorum_expectedvotes_notification res_lib_votequorum_expectedvotes_notification;
1836  struct quorum_pd *qpd;
1837  struct list_head *tmp;
1838 
1839  ENTER();
1840 
1841  log_printf(LOGSYS_LEVEL_DEBUG, "Sending expected votes callback");
1842 
1843  res_lib_votequorum_expectedvotes_notification.header.id = MESSAGE_RES_VOTEQUORUM_EXPECTEDVOTES_NOTIFICATION;
1844  res_lib_votequorum_expectedvotes_notification.header.size = sizeof(res_lib_votequorum_expectedvotes_notification);
1845  res_lib_votequorum_expectedvotes_notification.header.error = CS_OK;
1846  res_lib_votequorum_expectedvotes_notification.expected_votes = us->expected_votes;
1847 
1848  list_iterate(tmp, &trackers_list) {
1849  qpd = list_entry(tmp, struct quorum_pd, list);
1850  res_lib_votequorum_expectedvotes_notification.context = qpd->tracking_context;
1851  corosync_api->ipc_dispatch_send(qpd->conn, &res_lib_votequorum_expectedvotes_notification,
1852  sizeof(struct res_lib_votequorum_expectedvotes_notification));
1853  }
1854 
1855  LEAVE();
1856 }
1857 
1858 static void exec_votequorum_qdevice_reconfigure_endian_convert (void *message)
1859 {
1860  ENTER();
1861 
1862  LEAVE();
1863 }
1864 
1865 static void message_handler_req_exec_votequorum_qdevice_reconfigure (
1866  const void *message,
1867  unsigned int nodeid)
1868 {
1870 
1871  ENTER();
1872 
1873  log_printf(LOGSYS_LEVEL_DEBUG, "Received qdevice name change req from node %u [from: %s to: %s]",
1874  nodeid,
1875  req_exec_quorum_qdevice_reconfigure->oldname,
1876  req_exec_quorum_qdevice_reconfigure->newname);
1877 
1878  if (!strcmp(req_exec_quorum_qdevice_reconfigure->oldname, qdevice_name)) {
1879  log_printf(LOGSYS_LEVEL_DEBUG, "Allowing qdevice rename");
1880  memset(qdevice_name, 0, VOTEQUORUM_QDEVICE_MAX_NAME_LEN);
1881  strcpy(qdevice_name, req_exec_quorum_qdevice_reconfigure->newname);
1882  /*
1883  * TODO: notify qdevices about name change?
1884  * this is not relevant for now and can wait later on since
1885  * qdevices are local only and libvotequorum is not final
1886  */
1887  }
1888 
1889  LEAVE();
1890 }
1891 
1892 static void exec_votequorum_qdevice_reg_endian_convert (void *message)
1893 {
1895 
1896  ENTER();
1897 
1898  req_exec_quorum_qdevice_reg->operation = swab32(req_exec_quorum_qdevice_reg->operation);
1899 
1900  LEAVE();
1901 }
1902 
1903 static void message_handler_req_exec_votequorum_qdevice_reg (
1904  const void *message,
1905  unsigned int nodeid)
1906 {
1908  struct res_lib_votequorum_status res_lib_votequorum_status;
1909  int wipe_qdevice_name = 1;
1910  struct cluster_node *node = NULL;
1911  struct list_head *tmp;
1912  cs_error_t error = CS_OK;
1913 
1914  ENTER();
1915 
1916  log_printf(LOGSYS_LEVEL_DEBUG, "Received qdevice op %u req from node %u [%s]",
1917  req_exec_quorum_qdevice_reg->operation,
1918  nodeid, req_exec_quorum_qdevice_reg->qdevice_name);
1919 
1920  switch(req_exec_quorum_qdevice_reg->operation)
1921  {
1923  if (nodeid != us->node_id) {
1924  if (!strlen(qdevice_name)) {
1925  log_printf(LOGSYS_LEVEL_DEBUG, "Remote qdevice name recorded");
1926  strcpy(qdevice_name, req_exec_quorum_qdevice_reg->qdevice_name);
1927  }
1928  LEAVE();
1929  return;
1930  }
1931 
1932  /*
1933  * protect against the case where we broadcast qdevice registration
1934  * to new memebers, we receive the message back, but there is no registration
1935  * connection in progress
1936  */
1938  LEAVE();
1939  return;
1940  }
1941 
1942  /*
1943  * this should NEVER happen
1944  */
1945  if (!qdevice_reg_conn) {
1946  log_printf(LOGSYS_LEVEL_WARNING, "Unable to determine origin of the qdevice register call!");
1947  LEAVE();
1948  return;
1949  }
1950 
1951  /*
1952  * registering our own device in this case
1953  */
1954  if (!strlen(qdevice_name)) {
1955  strcpy(qdevice_name, req_exec_quorum_qdevice_reg->qdevice_name);
1956  }
1957 
1958  /*
1959  * check if it is our device or something else
1960  */
1961  if ((!strncmp(req_exec_quorum_qdevice_reg->qdevice_name,
1962  qdevice_name, VOTEQUORUM_QDEVICE_MAX_NAME_LEN))) {
1964  votequorum_exec_send_nodeinfo(VOTEQUORUM_QDEVICE_NODEID);
1965  votequorum_exec_send_nodeinfo(us->node_id);
1966  } else {
1968  "A new qdevice with different name (new: %s old: %s) is trying to register!",
1969  req_exec_quorum_qdevice_reg->qdevice_name, qdevice_name);
1970  error = CS_ERR_EXIST;
1971  }
1972 
1973  res_lib_votequorum_status.header.size = sizeof(res_lib_votequorum_status);
1974  res_lib_votequorum_status.header.id = MESSAGE_RES_VOTEQUORUM_STATUS;
1975  res_lib_votequorum_status.header.error = error;
1976  corosync_api->ipc_response_send(qdevice_reg_conn, &res_lib_votequorum_status, sizeof(res_lib_votequorum_status));
1977  qdevice_reg_conn = NULL;
1978  break;
1980  list_iterate(tmp, &cluster_members_list) {
1981  node = list_entry(tmp, struct cluster_node, list);
1982  if ((node->state == NODESTATE_MEMBER) &&
1984  wipe_qdevice_name = 0;
1985  }
1986  }
1987 
1988  if (wipe_qdevice_name) {
1989  memset(qdevice_name, 0, VOTEQUORUM_QDEVICE_MAX_NAME_LEN);
1990  }
1991 
1992  break;
1993  }
1994  LEAVE();
1995 }
1996 
1997 static void exec_votequorum_nodeinfo_endian_convert (void *message)
1998 {
1999  struct req_exec_quorum_nodeinfo *nodeinfo = message;
2000 
2001  ENTER();
2002 
2003  nodeinfo->nodeid = swab32(nodeinfo->nodeid);
2004  nodeinfo->votes = swab32(nodeinfo->votes);
2005  nodeinfo->expected_votes = swab32(nodeinfo->expected_votes);
2006  nodeinfo->flags = swab32(nodeinfo->flags);
2007 
2008  LEAVE();
2009 }
2010 
2011 static void message_handler_req_exec_votequorum_nodeinfo (
2012  const void *message,
2013  unsigned int sender_nodeid)
2014 {
2015  const struct req_exec_quorum_nodeinfo *req_exec_quorum_nodeinfo = message;
2016  struct cluster_node *node = NULL;
2017  int old_votes;
2018  int old_expected;
2019  uint32_t old_flags;
2020  nodestate_t old_state;
2021  int new_node = 0;
2022  int allow_downgrade = 0;
2023  int by_node = 0;
2024  unsigned int nodeid = req_exec_quorum_nodeinfo->nodeid;
2025 
2026  ENTER();
2027 
2028  log_printf(LOGSYS_LEVEL_DEBUG, "got nodeinfo message from cluster node %u", sender_nodeid);
2029  log_printf(LOGSYS_LEVEL_DEBUG, "nodeinfo message[%u]: votes: %d, expected: %d flags: %d",
2030  nodeid,
2031  req_exec_quorum_nodeinfo->votes,
2032  req_exec_quorum_nodeinfo->expected_votes,
2033  req_exec_quorum_nodeinfo->flags);
2034 
2035  if (nodeid != VOTEQUORUM_QDEVICE_NODEID) {
2036  decode_flags(req_exec_quorum_nodeinfo->flags);
2037  }
2038 
2039  node = find_node_by_nodeid(nodeid);
2040  if (!node) {
2041  node = allocate_node(nodeid);
2042  new_node = 1;
2043  }
2044  if (!node) {
2045  corosync_api->error_memory_failure();
2046  LEAVE();
2047  return;
2048  }
2049 
2050  if (new_node) {
2051  old_votes = 0;
2052  old_expected = 0;
2053  old_state = NODESTATE_DEAD;
2054  old_flags = 0;
2055  } else {
2056  old_votes = node->votes;
2057  old_expected = node->expected_votes;
2058  old_state = node->state;
2059  old_flags = node->flags;
2060  }
2061 
2062  if (nodeid == VOTEQUORUM_QDEVICE_NODEID) {
2063  struct cluster_node *sender_node = find_node_by_nodeid(sender_nodeid);
2064 
2065  assert(sender_node != NULL);
2066 
2067  if ((!cluster_is_quorate) &&
2068  (sender_node->flags & NODE_FLAGS_QUORATE)) {
2069  node->votes = req_exec_quorum_nodeinfo->votes;
2070  } else {
2071  node->votes = max(node->votes, req_exec_quorum_nodeinfo->votes);
2072  }
2073  goto recalculate;
2074  }
2075 
2076  /* Update node state */
2077  node->flags = req_exec_quorum_nodeinfo->flags;
2078  node->votes = req_exec_quorum_nodeinfo->votes;
2079  node->state = NODESTATE_MEMBER;
2080 
2081  if (node->flags & NODE_FLAGS_LEAVING) {
2082  node->state = NODESTATE_LEAVING;
2083  allow_downgrade = 1;
2084  by_node = 1;
2085  }
2086 
2087  if ((!cluster_is_quorate) &&
2088  (node->flags & NODE_FLAGS_QUORATE)) {
2089  allow_downgrade = 1;
2090  us->expected_votes = req_exec_quorum_nodeinfo->expected_votes;
2091  }
2092 
2093  if (node->flags & NODE_FLAGS_QUORATE || (ev_tracking)) {
2094  node->expected_votes = req_exec_quorum_nodeinfo->expected_votes;
2095  } else {
2096  node->expected_votes = us->expected_votes;
2097  }
2098 
2099  if ((last_man_standing) && (node->votes > 1)) {
2100  log_printf(LOGSYS_LEVEL_WARNING, "Last Man Standing feature is supported only when all"
2101  "cluster nodes votes are set to 1. Disabling LMS.");
2102  last_man_standing = 0;
2103  if (last_man_standing_timer_set) {
2104  corosync_api->timer_delete(last_man_standing_timer);
2105  last_man_standing_timer_set = 0;
2106  }
2107  }
2108 
2109 recalculate:
2110  if ((new_node) ||
2111  (nodeid == us->node_id) ||
2112  (node->flags & NODE_FLAGS_FIRST) ||
2113  (old_votes != node->votes) ||
2114  (old_expected != node->expected_votes) ||
2115  (old_flags != node->flags) ||
2116  (old_state != node->state)) {
2117  recalculate_quorum(allow_downgrade, by_node);
2118  }
2119 
2120  if ((wait_for_all) &&
2121  (!(node->flags & NODE_FLAGS_WFASTATUS)) &&
2122  (node->flags & NODE_FLAGS_QUORATE)) {
2123  update_wait_for_all_status(0);
2124  }
2125 
2126  LEAVE();
2127 }
2128 
2129 static void exec_votequorum_reconfigure_endian_convert (void *message)
2130 {
2131  struct req_exec_quorum_reconfigure *reconfigure = message;
2132 
2133  ENTER();
2134 
2135  reconfigure->nodeid = swab32(reconfigure->nodeid);
2136  reconfigure->value = swab32(reconfigure->value);
2137 
2138  LEAVE();
2139 }
2140 
2141 static void message_handler_req_exec_votequorum_reconfigure (
2142  const void *message,
2143  unsigned int nodeid)
2144 {
2146  struct cluster_node *node;
2147  struct list_head *nodelist;
2148 
2149  ENTER();
2150 
2151  log_printf(LOGSYS_LEVEL_DEBUG, "got reconfigure message from cluster node %u for %u",
2152  nodeid, req_exec_quorum_reconfigure->nodeid);
2153 
2154  switch(req_exec_quorum_reconfigure->param)
2155  {
2157  list_iterate(nodelist, &cluster_members_list) {
2158  node = list_entry(nodelist, struct cluster_node, list);
2159  if (node->state == NODESTATE_MEMBER) {
2160  node->expected_votes = req_exec_quorum_reconfigure->value;
2161  }
2162  }
2163  votequorum_exec_send_expectedvotes_notification();
2164  update_ev_barrier(req_exec_quorum_reconfigure->value);
2165  if (ev_tracking) {
2166  us->expected_votes = max(us->expected_votes, ev_tracking_barrier);
2167  }
2168  recalculate_quorum(1, 0); /* Allow decrease */
2169  break;
2170 
2172  node = find_node_by_nodeid(req_exec_quorum_reconfigure->nodeid);
2173  if (!node) {
2174  LEAVE();
2175  return;
2176  }
2177  node->votes = req_exec_quorum_reconfigure->value;
2178  recalculate_quorum(1, 0); /* Allow decrease */
2179  break;
2180 
2182  update_wait_for_all_status(0);
2183  log_printf(LOGSYS_LEVEL_INFO, "wait_for_all_status reset by user on node %d.",
2184  req_exec_quorum_reconfigure->nodeid);
2185  recalculate_quorum(0, 0);
2186 
2187  break;
2188 
2189  }
2190 
2191  LEAVE();
2192 }
2193 
2194 static int votequorum_exec_exit_fn (void)
2195 {
2196  int ret = 0;
2197 
2198  ENTER();
2199 
2200  /*
2201  * tell the other nodes we are leaving
2202  */
2203 
2204  if (allow_downscale) {
2205  us->flags |= NODE_FLAGS_LEAVING;
2206  ret = votequorum_exec_send_nodeinfo(us->node_id);
2207  }
2208 
2209  if ((ev_tracking) && (ev_tracking_fd != -1)) {
2210  close(ev_tracking_fd);
2211  }
2212 
2213 
2214  LEAVE();
2215  return ret;
2216 }
2217 
2218 static void votequorum_set_icmap_ro_keys(void)
2219 {
2220  icmap_set_ro_access("quorum.allow_downscale", CS_FALSE, CS_TRUE);
2221  icmap_set_ro_access("quorum.wait_for_all", CS_FALSE, CS_TRUE);
2222  icmap_set_ro_access("quorum.last_man_standing", CS_FALSE, CS_TRUE);
2223  icmap_set_ro_access("quorum.last_man_standing_window", CS_FALSE, CS_TRUE);
2224  icmap_set_ro_access("quorum.expected_votes_tracking", CS_FALSE, CS_TRUE);
2225  icmap_set_ro_access("quorum.auto_tie_breaker", CS_FALSE, CS_TRUE);
2226  icmap_set_ro_access("quorum.auto_tie_breaker_node", CS_FALSE, CS_TRUE);
2227 }
2228 
2229 static char *votequorum_exec_init_fn (struct corosync_api_v1 *api)
2230 {
2231  char *error = NULL;
2232 
2233  ENTER();
2234 
2235  /*
2236  * make sure we start clean
2237  */
2238  list_init(&cluster_members_list);
2239  list_init(&trackers_list);
2240  qdevice = NULL;
2241  us = NULL;
2242  memset(cluster_nodes, 0, sizeof(cluster_nodes));
2243 
2244  /*
2245  * Allocate a cluster_node for qdevice
2246  */
2247  qdevice = allocate_node(VOTEQUORUM_QDEVICE_NODEID);
2248  if (!qdevice) {
2249  LEAVE();
2250  return ((char *)"Could not allocate node.");
2251  }
2252  qdevice->votes = 0;
2253  memset(qdevice_name, 0, VOTEQUORUM_QDEVICE_MAX_NAME_LEN);
2254 
2255  /*
2256  * Allocate a cluster_node for us
2257  */
2258  us = allocate_node(corosync_api->totem_nodeid_get());
2259  if (!us) {
2260  LEAVE();
2261  return ((char *)"Could not allocate node.");
2262  }
2263 
2264  icmap_set_uint32("runtime.votequorum.this_node_id", us->node_id);
2265 
2266  us->state = NODESTATE_MEMBER;
2267  us->votes = 1;
2268  us->flags |= NODE_FLAGS_FIRST;
2269 
2270  error = votequorum_readconfig(VOTEQUORUM_READCONFIG_STARTUP);
2271  if (error) {
2272  return error;
2273  }
2274  recalculate_quorum(0, 0);
2275 
2276  /*
2277  * Set RO keys in icmap
2278  */
2279  votequorum_set_icmap_ro_keys();
2280 
2281  /*
2282  * Listen for changes
2283  */
2284  votequorum_exec_add_config_notification();
2285 
2286  /*
2287  * Start us off with one node
2288  */
2289  votequorum_exec_send_nodeinfo(us->node_id);
2290 
2291  LEAVE();
2292 
2293  return (NULL);
2294 }
2295 
2296 /*
2297  * votequorum service core
2298  */
2299 
2300 static void votequorum_last_man_standing_timer_fn(void *arg)
2301 {
2302  ENTER();
2303 
2304  last_man_standing_timer_set = 0;
2305  if (cluster_is_quorate) {
2306  recalculate_quorum(1,1);
2307  }
2308 
2309  LEAVE();
2310 }
2311 
2312 static void votequorum_sync_init (
2313  const unsigned int *trans_list, size_t trans_list_entries,
2314  const unsigned int *member_list, size_t member_list_entries,
2315  const struct memb_ring_id *ring_id)
2316 {
2317  int i, j;
2318  int found;
2319  int left_nodes;
2320  struct cluster_node *node;
2321 
2322  ENTER();
2323 
2324  sync_in_progress = 1;
2325  sync_nodeinfo_sent = 0;
2326  sync_wait_for_poll_or_timeout = 0;
2327 
2328  if (member_list_entries > 1) {
2329  us->flags &= ~NODE_FLAGS_FIRST;
2330  }
2331 
2332  /*
2333  * we don't need to track which nodes have left directly,
2334  * since that info is in the node db, but we need to know
2335  * if somebody has left for last_man_standing
2336  */
2337  left_nodes = 0;
2338  for (i = 0; i < quorum_members_entries; i++) {
2339  found = 0;
2340  for (j = 0; j < member_list_entries; j++) {
2341  if (quorum_members[i] == member_list[j]) {
2342  found = 1;
2343  break;
2344  }
2345  }
2346  if (found == 0) {
2347  left_nodes = 1;
2348  node = find_node_by_nodeid(quorum_members[i]);
2349  if (node) {
2350  node->state = NODESTATE_DEAD;
2351  }
2352  }
2353  }
2354 
2355  if (last_man_standing) {
2356  if (((member_list_entries >= quorum) && (left_nodes)) ||
2357  ((member_list_entries <= quorum) && (auto_tie_breaker != ATB_NONE) && (check_low_node_id_partition() == 1))) {
2358  if (last_man_standing_timer_set) {
2359  corosync_api->timer_delete(last_man_standing_timer);
2360  last_man_standing_timer_set = 0;
2361  }
2362  corosync_api->timer_add_duration((unsigned long long)last_man_standing_window*1000000,
2363  NULL, votequorum_last_man_standing_timer_fn,
2364  &last_man_standing_timer);
2365  last_man_standing_timer_set = 1;
2366  }
2367  }
2368 
2369  memcpy(previous_quorum_members, quorum_members, sizeof(unsigned int) * quorum_members_entries);
2370  previous_quorum_members_entries = quorum_members_entries;
2371 
2372  memcpy(quorum_members, member_list, sizeof(unsigned int) * member_list_entries);
2373  quorum_members_entries = member_list_entries;
2374  memcpy(&quorum_ringid, ring_id, sizeof(*ring_id));
2375 
2377  /*
2378  * Reset poll timer. Sync waiting is interrupted on valid qdevice poll or after timeout
2379  */
2380  if (qdevice_timer_set) {
2381  corosync_api->timer_delete(qdevice_timer);
2382  }
2383  corosync_api->timer_add_duration((unsigned long long)qdevice_sync_timeout*1000000, qdevice,
2384  qdevice_timer_fn, &qdevice_timer);
2385  qdevice_timer_set = 1;
2386  sync_wait_for_poll_or_timeout = 1;
2387 
2388  log_printf(LOGSYS_LEVEL_INFO, "waiting for quorum device %s poll (but maximum for %u ms)",
2389  qdevice_name, qdevice_sync_timeout);
2390  }
2391 
2392  LEAVE();
2393 }
2394 
2395 static int votequorum_sync_process (void)
2396 {
2397  if (!sync_nodeinfo_sent) {
2398  votequorum_exec_send_nodeinfo(us->node_id);
2399  votequorum_exec_send_nodeinfo(VOTEQUORUM_QDEVICE_NODEID);
2400  if (strlen(qdevice_name)) {
2401  votequorum_exec_send_qdevice_reg(VOTEQUORUM_QDEVICE_OPERATION_REGISTER,
2402  qdevice_name);
2403  }
2404  votequorum_exec_send_nodelist_notification(NULL, 0LL);
2405  sync_nodeinfo_sent = 1;
2406  }
2407 
2408  if (us->flags & NODE_FLAGS_QDEVICE_REGISTERED && sync_wait_for_poll_or_timeout) {
2409  /*
2410  * Waiting for qdevice to poll with new ringid or timeout
2411  */
2412 
2413  return (-1);
2414  }
2415 
2416  return 0;
2417 }
2418 
2419 static void votequorum_sync_activate (void)
2420 {
2421  recalculate_quorum(0, 0);
2422  quorum_callback(quorum_members, quorum_members_entries,
2423  cluster_is_quorate, &quorum_ringid);
2424  votequorum_exec_send_quorum_notification(NULL, 0L);
2425 
2426  sync_in_progress = 0;
2427 }
2428 
2429 static void votequorum_sync_abort (void)
2430 {
2431 
2432 }
2433 
2435  quorum_set_quorate_fn_t q_set_quorate_fn)
2436 {
2437  char *error;
2438 
2439  ENTER();
2440 
2441  if (q_set_quorate_fn == NULL) {
2442  return ((char *)"Quorate function not set");
2443  }
2444 
2445  corosync_api = api;
2446  quorum_callback = q_set_quorate_fn;
2447 
2448  error = corosync_service_link_and_init(corosync_api,
2449  &votequorum_service[0]);
2450  if (error) {
2451  return (error);
2452  }
2453 
2454  LEAVE();
2455 
2456  return (NULL);
2457 }
2458 
2459 /*
2460  * Library Handler init/fini
2461  */
2462 
2463 static int quorum_lib_init_fn (void *conn)
2464 {
2465  struct quorum_pd *pd = (struct quorum_pd *)corosync_api->ipc_private_data_get (conn);
2466 
2467  ENTER();
2468 
2469  list_init (&pd->list);
2470  pd->conn = conn;
2471 
2472  LEAVE();
2473  return (0);
2474 }
2475 
2476 static int quorum_lib_exit_fn (void *conn)
2477 {
2478  struct quorum_pd *quorum_pd = (struct quorum_pd *)corosync_api->ipc_private_data_get (conn);
2479 
2480  ENTER();
2481 
2482  if (quorum_pd->tracking_enabled) {
2483  list_del (&quorum_pd->list);
2484  list_init (&quorum_pd->list);
2485  }
2486 
2487  LEAVE();
2488 
2489  return (0);
2490 }
2491 
2492 /*
2493  * library internal functions
2494  */
2495 
2496 static void qdevice_timer_fn(void *arg)
2497 {
2498  ENTER();
2499 
2500  if ((!(us->flags & NODE_FLAGS_QDEVICE_ALIVE)) ||
2501  (!qdevice_timer_set)) {
2502  LEAVE();
2503  return;
2504  }
2505 
2508  log_printf(LOGSYS_LEVEL_INFO, "lost contact with quorum device %s", qdevice_name);
2509  votequorum_exec_send_nodeinfo(us->node_id);
2510 
2511  qdevice_timer_set = 0;
2512  sync_wait_for_poll_or_timeout = 0;
2513 
2514  LEAVE();
2515 }
2516 
2517 /*
2518  * Library Handler Functions
2519  */
2520 
2521 static void message_handler_req_lib_votequorum_getinfo (void *conn, const void *message)
2522 {
2524  struct res_lib_votequorum_getinfo res_lib_votequorum_getinfo;
2525  struct cluster_node *node;
2526  unsigned int highest_expected = 0;
2527  unsigned int total_votes = 0;
2528  cs_error_t error = CS_OK;
2529  uint32_t nodeid = req_lib_votequorum_getinfo->nodeid;
2530 
2531  ENTER();
2532 
2533  log_printf(LOGSYS_LEVEL_DEBUG, "got getinfo request on %p for node %u", conn, req_lib_votequorum_getinfo->nodeid);
2534 
2535  if (nodeid == VOTEQUORUM_QDEVICE_NODEID) {
2536  nodeid = us->node_id;
2537  }
2538 
2539  node = find_node_by_nodeid(nodeid);
2540  if (node) {
2541  struct cluster_node *iternode;
2542  struct list_head *nodelist;
2543 
2544  list_iterate(nodelist, &cluster_members_list) {
2545  iternode = list_entry(nodelist, struct cluster_node, list);
2546 
2547  if (iternode->state == NODESTATE_MEMBER) {
2548  highest_expected =
2549  max(highest_expected, iternode->expected_votes);
2550  total_votes += iternode->votes;
2551  }
2552  }
2553 
2554  if (node->flags & NODE_FLAGS_QDEVICE_CAST_VOTE) {
2555  total_votes += qdevice->votes;
2556  }
2557 
2558  switch(node->state) {
2559  case NODESTATE_MEMBER:
2560  res_lib_votequorum_getinfo.state = VOTEQUORUM_NODESTATE_MEMBER;
2561  break;
2562  case NODESTATE_DEAD:
2563  res_lib_votequorum_getinfo.state = VOTEQUORUM_NODESTATE_DEAD;
2564  break;
2565  case NODESTATE_LEAVING:
2566  res_lib_votequorum_getinfo.state = VOTEQUORUM_NODESTATE_LEAVING;
2567  break;
2568  default:
2569  res_lib_votequorum_getinfo.state = node->state;
2570  break;
2571  }
2572  res_lib_votequorum_getinfo.state = node->state;
2573  res_lib_votequorum_getinfo.votes = node->votes;
2574  res_lib_votequorum_getinfo.expected_votes = node->expected_votes;
2575  res_lib_votequorum_getinfo.highest_expected = highest_expected;
2576 
2577  res_lib_votequorum_getinfo.quorum = quorum;
2578  res_lib_votequorum_getinfo.total_votes = total_votes;
2579  res_lib_votequorum_getinfo.flags = 0;
2580  res_lib_votequorum_getinfo.nodeid = node->node_id;
2581 
2582  if (two_node) {
2583  res_lib_votequorum_getinfo.flags |= VOTEQUORUM_INFO_TWONODE;
2584  }
2585  if (cluster_is_quorate) {
2586  res_lib_votequorum_getinfo.flags |= VOTEQUORUM_INFO_QUORATE;
2587  }
2588  if (wait_for_all) {
2589  res_lib_votequorum_getinfo.flags |= VOTEQUORUM_INFO_WAIT_FOR_ALL;
2590  }
2591  if (last_man_standing) {
2592  res_lib_votequorum_getinfo.flags |= VOTEQUORUM_INFO_LAST_MAN_STANDING;
2593  }
2594  if (auto_tie_breaker != ATB_NONE) {
2595  res_lib_votequorum_getinfo.flags |= VOTEQUORUM_INFO_AUTO_TIE_BREAKER;
2596  }
2597  if (allow_downscale) {
2598  res_lib_votequorum_getinfo.flags |= VOTEQUORUM_INFO_ALLOW_DOWNSCALE;
2599  }
2600 
2601  memset(res_lib_votequorum_getinfo.qdevice_name, 0, VOTEQUORUM_QDEVICE_MAX_NAME_LEN);
2602  strcpy(res_lib_votequorum_getinfo.qdevice_name, qdevice_name);
2603  res_lib_votequorum_getinfo.qdevice_votes = qdevice->votes;
2604 
2605  if (node->flags & NODE_FLAGS_QDEVICE_REGISTERED) {
2606  res_lib_votequorum_getinfo.flags |= VOTEQUORUM_INFO_QDEVICE_REGISTERED;
2607  }
2608  if (node->flags & NODE_FLAGS_QDEVICE_ALIVE) {
2609  res_lib_votequorum_getinfo.flags |= VOTEQUORUM_INFO_QDEVICE_ALIVE;
2610  }
2611  if (node->flags & NODE_FLAGS_QDEVICE_CAST_VOTE) {
2612  res_lib_votequorum_getinfo.flags |= VOTEQUORUM_INFO_QDEVICE_CAST_VOTE;
2613  }
2614  if (node->flags & NODE_FLAGS_QDEVICE_MASTER_WINS) {
2615  res_lib_votequorum_getinfo.flags |= VOTEQUORUM_INFO_QDEVICE_MASTER_WINS;
2616  }
2617  } else {
2618  error = CS_ERR_NOT_EXIST;
2619  }
2620 
2621  res_lib_votequorum_getinfo.header.size = sizeof(res_lib_votequorum_getinfo);
2622  res_lib_votequorum_getinfo.header.id = MESSAGE_RES_VOTEQUORUM_GETINFO;
2623  res_lib_votequorum_getinfo.header.error = error;
2624  corosync_api->ipc_response_send(conn, &res_lib_votequorum_getinfo, sizeof(res_lib_votequorum_getinfo));
2625  log_printf(LOGSYS_LEVEL_DEBUG, "getinfo response error: %d", error);
2626 
2627  LEAVE();
2628 }
2629 
2630 static void message_handler_req_lib_votequorum_setexpected (void *conn, const void *message)
2631 {
2633  struct res_lib_votequorum_status res_lib_votequorum_status;
2634  cs_error_t error = CS_OK;
2635  unsigned int newquorum;
2636  unsigned int total_votes;
2637  uint8_t allow_downscale_status = 0;
2638 
2639  ENTER();
2640 
2641  allow_downscale_status = allow_downscale;
2642  allow_downscale = 0;
2643 
2644  /*
2645  * Validate new expected votes
2646  */
2647  newquorum = calculate_quorum(1, req_lib_votequorum_setexpected->expected_votes, &total_votes);
2648  allow_downscale = allow_downscale_status;
2649  if (newquorum < total_votes / 2 ||
2650  newquorum > total_votes) {
2651  error = CS_ERR_INVALID_PARAM;
2652  goto error_exit;
2653  }
2654 
2655  votequorum_exec_send_reconfigure(VOTEQUORUM_RECONFIG_PARAM_EXPECTED_VOTES, us->node_id,
2656  req_lib_votequorum_setexpected->expected_votes);
2657 
2658 error_exit:
2659  res_lib_votequorum_status.header.size = sizeof(res_lib_votequorum_status);
2660  res_lib_votequorum_status.header.id = MESSAGE_RES_VOTEQUORUM_STATUS;
2661  res_lib_votequorum_status.header.error = error;
2662  corosync_api->ipc_response_send(conn, &res_lib_votequorum_status, sizeof(res_lib_votequorum_status));
2663 
2664  LEAVE();
2665 }
2666 
2667 static void message_handler_req_lib_votequorum_setvotes (void *conn, const void *message)
2668 {
2670  struct res_lib_votequorum_status res_lib_votequorum_status;
2671  struct cluster_node *node;
2672  unsigned int newquorum;
2673  unsigned int total_votes;
2674  unsigned int saved_votes;
2675  cs_error_t error = CS_OK;
2676  unsigned int nodeid;
2677 
2678  ENTER();
2679 
2680  nodeid = req_lib_votequorum_setvotes->nodeid;
2681  node = find_node_by_nodeid(nodeid);
2682  if (!node) {
2683  error = CS_ERR_NAME_NOT_FOUND;
2684  goto error_exit;
2685  }
2686 
2687  /*
2688  * Check votes is valid
2689  */
2690  saved_votes = node->votes;
2691  node->votes = req_lib_votequorum_setvotes->votes;
2692 
2693  newquorum = calculate_quorum(1, 0, &total_votes);
2694 
2695  if (newquorum < total_votes / 2 ||
2696  newquorum > total_votes) {
2697  node->votes = saved_votes;
2698  error = CS_ERR_INVALID_PARAM;
2699  goto error_exit;
2700  }
2701 
2702  votequorum_exec_send_reconfigure(VOTEQUORUM_RECONFIG_PARAM_NODE_VOTES, nodeid,
2703  req_lib_votequorum_setvotes->votes);
2704 
2705 error_exit:
2706  res_lib_votequorum_status.header.size = sizeof(res_lib_votequorum_status);
2707  res_lib_votequorum_status.header.id = MESSAGE_RES_VOTEQUORUM_STATUS;
2708  res_lib_votequorum_status.header.error = error;
2709  corosync_api->ipc_response_send(conn, &res_lib_votequorum_status, sizeof(res_lib_votequorum_status));
2710 
2711  LEAVE();
2712 }
2713 
2714 static void message_handler_req_lib_votequorum_trackstart (void *conn,
2715  const void *message)
2716 {
2718  struct res_lib_votequorum_status res_lib_votequorum_status;
2719  struct quorum_pd *quorum_pd = (struct quorum_pd *)corosync_api->ipc_private_data_get (conn);
2720  cs_error_t error = CS_OK;
2721 
2722  ENTER();
2723 
2724  /*
2725  * If an immediate listing of the current cluster membership
2726  * is requested, generate membership list
2727  */
2728  if (req_lib_votequorum_trackstart->track_flags & CS_TRACK_CURRENT ||
2729  req_lib_votequorum_trackstart->track_flags & CS_TRACK_CHANGES) {
2730  log_printf(LOGSYS_LEVEL_DEBUG, "sending initial status to %p", conn);
2731  votequorum_exec_send_nodelist_notification(conn, req_lib_votequorum_trackstart->context);
2732  votequorum_exec_send_quorum_notification(conn, req_lib_votequorum_trackstart->context);
2733  }
2734 
2735  if (quorum_pd->tracking_enabled) {
2736  error = CS_ERR_EXIST;
2737  goto response_send;
2738  }
2739 
2740  /*
2741  * Record requests for tracking
2742  */
2743  if (req_lib_votequorum_trackstart->track_flags & CS_TRACK_CHANGES ||
2744  req_lib_votequorum_trackstart->track_flags & CS_TRACK_CHANGES_ONLY) {
2745 
2746  quorum_pd->track_flags = req_lib_votequorum_trackstart->track_flags;
2747  quorum_pd->tracking_enabled = 1;
2748  quorum_pd->tracking_context = req_lib_votequorum_trackstart->context;
2749 
2750  list_add (&quorum_pd->list, &trackers_list);
2751  }
2752 
2753 response_send:
2754  res_lib_votequorum_status.header.size = sizeof(res_lib_votequorum_status);
2755  res_lib_votequorum_status.header.id = MESSAGE_RES_VOTEQUORUM_STATUS;
2756  res_lib_votequorum_status.header.error = error;
2757  corosync_api->ipc_response_send(conn, &res_lib_votequorum_status, sizeof(res_lib_votequorum_status));
2758 
2759  LEAVE();
2760 }
2761 
2762 static void message_handler_req_lib_votequorum_trackstop (void *conn,
2763  const void *message)
2764 {
2765  struct res_lib_votequorum_status res_lib_votequorum_status;
2766  struct quorum_pd *quorum_pd = (struct quorum_pd *)corosync_api->ipc_private_data_get (conn);
2767  int error = CS_OK;
2768 
2769  ENTER();
2770 
2771  if (quorum_pd->tracking_enabled) {
2772  error = CS_OK;
2773  quorum_pd->tracking_enabled = 0;
2774  list_del (&quorum_pd->list);
2775  list_init (&quorum_pd->list);
2776  } else {
2777  error = CS_ERR_NOT_EXIST;
2778  }
2779 
2780  res_lib_votequorum_status.header.size = sizeof(res_lib_votequorum_status);
2781  res_lib_votequorum_status.header.id = MESSAGE_RES_VOTEQUORUM_STATUS;
2782  res_lib_votequorum_status.header.error = error;
2783  corosync_api->ipc_response_send(conn, &res_lib_votequorum_status, sizeof(res_lib_votequorum_status));
2784 
2785  LEAVE();
2786 }
2787 
2788 static void message_handler_req_lib_votequorum_qdevice_register (void *conn,
2789  const void *message)
2790 {
2792  struct res_lib_votequorum_status res_lib_votequorum_status;
2793  cs_error_t error = CS_OK;
2794 
2795  ENTER();
2796 
2797  if (!qdevice_can_operate) {
2798  log_printf(LOGSYS_LEVEL_INFO, "Registration of quorum device is disabled by incorrect corosync.conf. See logs for more information");
2799  error = CS_ERR_ACCESS;
2800  goto out;
2801  }
2802 
2804  if ((!strncmp(req_lib_votequorum_qdevice_register->name,
2805  qdevice_name, VOTEQUORUM_QDEVICE_MAX_NAME_LEN))) {
2806  goto out;
2807  } else {
2809  "A new qdevice with different name (new: %s old: %s) is trying to re-register!",
2810  req_lib_votequorum_qdevice_register->name, qdevice_name);
2811  error = CS_ERR_EXIST;
2812  goto out;
2813  }
2814  } else {
2815  if (qdevice_reg_conn != NULL) {
2817  "Registration request already in progress");
2818  error = CS_ERR_TRY_AGAIN;
2819  goto out;
2820  }
2821  qdevice_reg_conn = conn;
2822  if (votequorum_exec_send_qdevice_reg(VOTEQUORUM_QDEVICE_OPERATION_REGISTER,
2823  req_lib_votequorum_qdevice_register->name) != 0) {
2825  "Unable to send qdevice registration request to cluster");
2826  error = CS_ERR_TRY_AGAIN;
2827  qdevice_reg_conn = NULL;
2828  } else {
2829  LEAVE();
2830  return;
2831  }
2832  }
2833 
2834 out:
2835 
2836  res_lib_votequorum_status.header.size = sizeof(res_lib_votequorum_status);
2837  res_lib_votequorum_status.header.id = MESSAGE_RES_VOTEQUORUM_STATUS;
2838  res_lib_votequorum_status.header.error = error;
2839  corosync_api->ipc_response_send(conn, &res_lib_votequorum_status, sizeof(res_lib_votequorum_status));
2840 
2841  LEAVE();
2842 }
2843 
2844 static void message_handler_req_lib_votequorum_qdevice_unregister (void *conn,
2845  const void *message)
2846 {
2848  struct res_lib_votequorum_status res_lib_votequorum_status;
2849  cs_error_t error = CS_OK;
2850 
2851  ENTER();
2852 
2854  if (strncmp(req_lib_votequorum_qdevice_unregister->name, qdevice_name, VOTEQUORUM_QDEVICE_MAX_NAME_LEN)) {
2855  error = CS_ERR_INVALID_PARAM;
2856  goto out;
2857  }
2858  if (qdevice_timer_set) {
2859  corosync_api->timer_delete(qdevice_timer);
2860  qdevice_timer_set = 0;
2861  sync_wait_for_poll_or_timeout = 0;
2862  }
2867  votequorum_exec_send_nodeinfo(us->node_id);
2868  votequorum_exec_send_qdevice_reg(VOTEQUORUM_QDEVICE_OPERATION_UNREGISTER,
2869  req_lib_votequorum_qdevice_unregister->name);
2870  } else {
2871  error = CS_ERR_NOT_EXIST;
2872  }
2873 
2874 out:
2875  res_lib_votequorum_status.header.size = sizeof(res_lib_votequorum_status);
2876  res_lib_votequorum_status.header.id = MESSAGE_RES_VOTEQUORUM_STATUS;
2877  res_lib_votequorum_status.header.error = error;
2878  corosync_api->ipc_response_send(conn, &res_lib_votequorum_status, sizeof(res_lib_votequorum_status));
2879 
2880  LEAVE();
2881 }
2882 
2883 static void message_handler_req_lib_votequorum_qdevice_update (void *conn,
2884  const void *message)
2885 {
2887  struct res_lib_votequorum_status res_lib_votequorum_status;
2888  cs_error_t error = CS_OK;
2889 
2890  ENTER();
2891 
2893  if (strncmp(req_lib_votequorum_qdevice_update->oldname, qdevice_name, VOTEQUORUM_QDEVICE_MAX_NAME_LEN)) {
2894  error = CS_ERR_INVALID_PARAM;
2895  goto out;
2896  }
2897  votequorum_exec_send_qdevice_reconfigure(req_lib_votequorum_qdevice_update->oldname,
2898  req_lib_votequorum_qdevice_update->newname);
2899  } else {
2900  error = CS_ERR_NOT_EXIST;
2901  }
2902 
2903 out:
2904  res_lib_votequorum_status.header.size = sizeof(res_lib_votequorum_status);
2905  res_lib_votequorum_status.header.id = MESSAGE_RES_VOTEQUORUM_STATUS;
2906  res_lib_votequorum_status.header.error = error;
2907  corosync_api->ipc_response_send(conn, &res_lib_votequorum_status, sizeof(res_lib_votequorum_status));
2908 
2909  LEAVE();
2910 }
2911 
2912 static void message_handler_req_lib_votequorum_qdevice_poll (void *conn,
2913  const void *message)
2914 {
2916  struct res_lib_votequorum_status res_lib_votequorum_status;
2917  cs_error_t error = CS_OK;
2918  uint32_t oldflags;
2919 
2920  ENTER();
2921 
2922  if (!qdevice_can_operate) {
2923  error = CS_ERR_ACCESS;
2924  goto out;
2925  }
2926 
2928  if (!(req_lib_votequorum_qdevice_poll->ring_id.nodeid == quorum_ringid.rep.nodeid &&
2929  req_lib_votequorum_qdevice_poll->ring_id.seq == quorum_ringid.seq)) {
2930  log_printf(LOGSYS_LEVEL_DEBUG, "Received poll ring id (%u.%"PRIu64") != last sync "
2931  "ring id (%u.%"PRIu64"). Ignoring poll call.",
2932  req_lib_votequorum_qdevice_poll->ring_id.nodeid, req_lib_votequorum_qdevice_poll->ring_id.seq,
2933  quorum_ringid.rep.nodeid, quorum_ringid.seq);
2934  error = CS_ERR_MESSAGE_ERROR;
2935  goto out;
2936  }
2937  if (strncmp(req_lib_votequorum_qdevice_poll->name, qdevice_name, VOTEQUORUM_QDEVICE_MAX_NAME_LEN)) {
2938  error = CS_ERR_INVALID_PARAM;
2939  goto out;
2940  }
2941 
2942  if (qdevice_timer_set) {
2943  corosync_api->timer_delete(qdevice_timer);
2944  qdevice_timer_set = 0;
2945  }
2946 
2947  oldflags = us->flags;
2948 
2950 
2951  if (req_lib_votequorum_qdevice_poll->cast_vote) {
2953  } else {
2955  }
2956 
2957  if (us->flags != oldflags) {
2958  votequorum_exec_send_nodeinfo(us->node_id);
2959  }
2960 
2961  corosync_api->timer_add_duration((unsigned long long)qdevice_timeout*1000000, qdevice,
2962  qdevice_timer_fn, &qdevice_timer);
2963  qdevice_timer_set = 1;
2964  sync_wait_for_poll_or_timeout = 0;
2965  } else {
2966  error = CS_ERR_NOT_EXIST;
2967  }
2968 
2969 out:
2970  res_lib_votequorum_status.header.size = sizeof(res_lib_votequorum_status);
2971  res_lib_votequorum_status.header.id = MESSAGE_RES_VOTEQUORUM_STATUS;
2972  res_lib_votequorum_status.header.error = error;
2973  corosync_api->ipc_response_send(conn, &res_lib_votequorum_status, sizeof(res_lib_votequorum_status));
2974 
2975  LEAVE();
2976 }
2977 
2978 static void message_handler_req_lib_votequorum_qdevice_master_wins (void *conn,
2979  const void *message)
2980 {
2982  struct res_lib_votequorum_status res_lib_votequorum_status;
2983  cs_error_t error = CS_OK;
2984  uint32_t oldflags = us->flags;
2985 
2986  ENTER();
2987 
2988  if (!qdevice_can_operate) {
2989  error = CS_ERR_ACCESS;
2990  goto out;
2991  }
2992 
2994  if (strncmp(req_lib_votequorum_qdevice_master_wins->name, qdevice_name, VOTEQUORUM_QDEVICE_MAX_NAME_LEN)) {
2995  error = CS_ERR_INVALID_PARAM;
2996  goto out;
2997  }
2998 
2999  if (req_lib_votequorum_qdevice_master_wins->allow) {
3001  } else {
3003  }
3004 
3005  if (us->flags != oldflags) {
3006  votequorum_exec_send_nodeinfo(us->node_id);
3007  }
3008 
3009  update_qdevice_master_wins(req_lib_votequorum_qdevice_master_wins->allow);
3010  } else {
3011  error = CS_ERR_NOT_EXIST;
3012  }
3013 
3014 out:
3015  res_lib_votequorum_status.header.size = sizeof(res_lib_votequorum_status);
3016  res_lib_votequorum_status.header.id = MESSAGE_RES_VOTEQUORUM_STATUS;
3017  res_lib_votequorum_status.header.error = error;
3018  corosync_api->ipc_response_send(conn, &res_lib_votequorum_status, sizeof(res_lib_votequorum_status));
3019 
3020  LEAVE();
3021 }
uint32_t expected_votes
char name[VOTEQUORUM_QDEVICE_MAX_NAME_LEN]
char newname[VOTEQUORUM_QDEVICE_MAX_NAME_LEN]
void *(* ipc_private_data_get)(void *conn)
Definition: coroapi.h:256
#define VOTEQUORUM_INFO_QUORATE
#define TOTEM_AGREED
Definition: coroapi.h:102
#define CS_TRUE
Definition: corotypes.h:54
const char * name
Definition: coroapi.h:492
char oldname[VOTEQUORUM_QDEVICE_MAX_NAME_LEN]
#define VOTEQUORUM_READCONFIG_STARTUP
const char * get_run_dir(void)
Definition: util.c:174
void(* timer_delete)(corosync_timer_handle_t timer_handle)
Definition: coroapi.h:241
int(* timer_add_duration)(unsigned long long nanoseconds_in_future, void *data, void(*timer_nf)(void *data), corosync_timer_handle_t *handle)
Definition: coroapi.h:229
const char * icmap_iter_next(icmap_iter_t iter, size_t *value_len, icmap_value_types_t *type)
Return next item in iterator iter.
Definition: icmap.c:1103
#define NODE_FLAGS_WFASTATUS
#define LOGSYS_LEVEL_INFO
Definition: logsys.h:73
uint32_t value
#define CS_FALSE
Definition: corotypes.h:53
struct list_head * next
Definition: list.h:47
#define NODE_FLAGS_QUORATE
#define VOTEQUORUM_QDEVICE_DEFAULT_SYNC_TIMEOUT
The corosync_service_engine struct.
Definition: coroapi.h:491
struct list_head list
void icmap_iter_finalize(icmap_iter_t iter)
Finalize iterator.
Definition: icmap.c:1124
The req_lib_votequorum_qdevice_master_wins struct.
#define VOTEQUORUM_QDEVICE_OPERATION_UNREGISTER
#define MESSAGE_REQ_EXEC_VOTEQUORUM_RECONFIGURE
#define max(a, b)
int(* ipc_response_send)(void *conn, const void *msg, size_t mlen)
Definition: coroapi.h:258
#define list_iterate(v, head)
char * votequorum_init(struct corosync_api_v1 *api, quorum_set_quorate_fn_t q_set_quorate_fn)
nodestate_t
#define VOTEQUORUM_RECONFIG_PARAM_CANCEL_WFA
int tracking_enabled
char name[VOTEQUORUM_QDEVICE_MAX_NAME_LEN]
char qdevice_name[VOTEQUORUM_QDEVICE_MAX_NAME_LEN]
#define CS_TRACK_CURRENT
Definition: corotypes.h:87
The req_lib_votequorum_qdevice_unregister struct.
#define NODE_FLAGS_QDEVICE_MASTER_WINS
nodestate_t state
The res_lib_votequorum_quorum_notification struct.
The corosync_lib_handler struct.
Definition: coroapi.h:468
#define VOTEQUORUM_INFO_LAST_MAN_STANDING
struct message_header header
Definition: totemsrp.c:60
#define VOTEQUORUM_INFO_WAIT_FOR_ALL
#define NODE_FLAGS_QDEVICE_CAST_VOTE
uint32_t operation
The res_lib_votequorum_status struct.
char name[VOTEQUORUM_QDEVICE_MAX_NAME_LEN]
#define MESSAGE_REQ_EXEC_VOTEQUORUM_QDEVICE_RECONFIGURE
The corosync_exec_handler struct.
Definition: coroapi.h:476
#define VOTEQUORUM_INFO_TWONODE
int(* totem_mcast)(const struct iovec *iovec, unsigned int iov_len, unsigned int guarantee)
Definition: coroapi.h:281
char qdevice_name[VOTEQUORUM_QDEVICE_MAX_NAME_LEN]
Definition: list.h:46
#define VOTEQUORUM_INFO_QDEVICE_REGISTERED
#define log_printf(level, format, args...)
Definition: logsys.h:319
void(* exec_handler_fn)(const void *msg, unsigned int nodeid)
Definition: coroapi.h:477
char oldname[VOTEQUORUM_QDEVICE_MAX_NAME_LEN]
#define VOTEQUORUM_QDEVICE_NODEID
#define VOTEQUORUM_INFO_QDEVICE_MASTER_WINS
#define VOTEQUORUM_NODESTATE_MEMBER
#define CS_TRACK_CHANGES
Definition: corotypes.h:88
#define SERVICE_ID_MAKE(a, b)
Definition: coroapi.h:459
#define ICMAP_TRACK_DELETE
Definition: icmap.h:77
#define ICMAP_KEYNAME_MAXLEN
Maximum length of key in icmap.
Definition: icmap.h:48
void(* quorum_set_quorate_fn_t)(const unsigned int *view_list, size_t view_list_entries, int quorate, struct memb_ring_id *)
Definition: exec/quorum.h:42
#define VOTEQUORUM_QDEVICE_OPERATION_REGISTER
cs_error_t icmap_get_uint8(const char *key_name, uint8_t *u8)
Definition: icmap.c:842
void(* error_memory_failure)(void) __attribute__((noreturn))
Definition: coroapi.h:423
#define VOTEQUORUM_INFO_ALLOW_DOWNSCALE
#define LOGSYS_LEVEL_WARNING
Definition: logsys.h:71
#define ICMAP_TRACK_MODIFY
Definition: icmap.h:78
#define VOTEQUORUM_INFO_QDEVICE_ALIVE
cs_error_t icmap_set_uint32(const char *key_name, uint32_t value)
Definition: icmap.c:611
void * user_data
Definition: sam.c:127
struct list_head list
unsigned int(* totem_nodeid_get)(void)
Definition: coroapi.h:275
unsigned int nodeid
Definition: coroapi.h:112
#define CS_TRACK_CHANGES_ONLY
Definition: corotypes.h:89
#define ICMAP_TRACK_ADD
Definition: icmap.h:76
The req_lib_votequorum_getinfo struct.
char name[VOTEQUORUM_QDEVICE_MAX_NAME_LEN]
Linked list API.
struct totem_ip_address rep
Definition: coroapi.h:123
#define COROSYNC_LIB_FLOW_CONTROL_NOT_REQUIRED
Definition: coroapi.h:157
The req_lib_votequorum_qdevice_update struct.
cs_error_t
The cs_error_t enum.
Definition: corotypes.h:94
unsigned char track_flags
#define LOGSYS_LEVEL_DEBUG
Definition: logsys.h:74
LOGSYS_DECLARE_SUBSYS("VOTEQ")
The req_lib_votequorum_setvotes struct.
The corosync_api_v1 struct.
Definition: coroapi.h:225
cs_error_t icmap_get_uint32(const char *key_name, uint32_t *u32)
Definition: icmap.c:866
uint8_t param
The req_lib_votequorum_setexpected struct.
uint32_t quorate
Definition: sam.c:134
#define swab32(x)
The swab32 macro.
Definition: swab.h:51
#define VOTEQUORUM_INFO_AUTO_TIE_BREAKER
struct corosync_service_engine * votequorum_get_service_engine_ver0(void)
The res_lib_votequorum_expectedvotes_notification struct.
#define ENTER
Definition: logsys.h:320
The req_lib_votequorum_qdevice_register struct.
char * corosync_service_link_and_init(struct corosync_api_v1 *corosync_api, struct default_service *service)
Link and initialize a service.
Definition: service.c:117
char newname[VOTEQUORUM_QDEVICE_MAX_NAME_LEN]
#define VOTEQUORUM_NODESTATE_LEAVING
#define PROCESSOR_COUNT_MAX
Definition: coroapi.h:96
#define MESSAGE_REQ_EXEC_VOTEQUORUM_QDEVICE_REG
The memb_ring_id struct.
Definition: coroapi.h:122
#define VOTEQUORUM_READCONFIG_RUNTIME
struct list_head * prev
Definition: list.h:48
#define MESSAGE_REQ_EXEC_VOTEQUORUM_NODEINFO
The req_lib_votequorum_trackstart struct.
#define VOTEQUORUM_RECONFIG_PARAM_NODE_VOTES
#define VOTEQUORUM_QDEVICE_MAX_NAME_LEN
qb_loop_timer_handle corosync_timer_handle_t
corosync_timer_handle_t
Definition: coroapi.h:74
The req_lib_votequorum_qdevice_poll struct.
cs_error_t icmap_get_string(const char *key_name, char **str)
Shortcut for icmap_get for string type.
Definition: icmap.c:896
#define LOGSYS_LEVEL_CRIT
Definition: logsys.h:69
char oldname[VOTEQUORUM_QDEVICE_MAX_NAME_LEN]
#define NODE_FLAGS_LEAVING
#define list_entry(ptr, type, member)
Definition: list.h:84
char newname[VOTEQUORUM_QDEVICE_MAX_NAME_LEN]
#define COROSYNC_LIB_FLOW_CONTROL_REQUIRED
Definition: coroapi.h:156
#define LOGSYS_LEVEL_NOTICE
Definition: logsys.h:72
unsigned long long seq
Definition: coroapi.h:124
cs_error_t icmap_set_uint8(const char *key_name, uint8_t value)
Definition: icmap.c:587
void(* lib_handler_fn)(void *conn, const void *msg)
Definition: coroapi.h:469
The res_lib_votequorum_getinfo struct.
#define VOTEQUORUM_NODESTATE_DEAD
cs_error_t icmap_set_ro_access(const char *key_name, int prefix, int ro_access)
Set read-only access for given key (key_name) or prefix, If prefix is set.
Definition: icmap.c:1233
#define VOTEQUORUM_INFO_QDEVICE_CAST_VOTE
int(* ipc_dispatch_send)(void *conn, const void *msg, size_t mlen)
Definition: coroapi.h:263
#define VOTEQUORUM_QDEVICE_DEFAULT_TIMEOUT
const char * name
Definition: service.h:43
icmap_iter_t icmap_iter_init(const char *prefix)
Initialize iterator with given prefix.
Definition: icmap.c:1097
struct memb_ring_id ring_id
Definition: totemsrp.c:64
uint64_t tracking_context
#define VOTEQUORUM_RECONFIG_PARAM_EXPECTED_VOTES
#define DEFAULT_LMS_WIN
#define LEAVE
Definition: logsys.h:321
#define NODE_FLAGS_QDEVICE_ALIVE
qb_map_iter_t * icmap_iter_t
Itterator type.
Definition: icmap.h:123
Structure passed as new_value and old_value in change callback.
Definition: icmap.h:91
#define NODE_FLAGS_QDEVICE_REGISTERED
cs_error_t icmap_track_add(const char *key_name, int32_t track_type, icmap_notify_fn_t notify_fn, void *user_data, icmap_track_t *icmap_track)
Add tracking function for given key_name.
Definition: icmap.c:1167
#define NODE_FLAGS_FIRST
struct qb_ipc_request_header header __attribute__((aligned(8)))
#define ICMAP_TRACK_PREFIX
Whole prefix is tracked, instead of key only (so "totem." tracking means that "totem.nodeid", "totem.version", ...
Definition: icmap.h:85