corosync  2.4.2-dirty
totemiba.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2009-2012 Red Hat, Inc.
3  *
4  * All rights reserved.
5  *
6  * Author: Steven Dake (sdake@redhat.com)
7 
8  * This software licensed under BSD license, the text of which follows:
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions are met:
12  *
13  * - Redistributions of source code must retain the above copyright notice,
14  * this list of conditions and the following disclaimer.
15  * - Redistributions in binary form must reproduce the above copyright notice,
16  * this list of conditions and the following disclaimer in the documentation
17  * and/or other materials provided with the distribution.
18  * - Neither the name of the MontaVista Software, Inc. nor the names of its
19  * contributors may be used to endorse or promote products derived from this
20  * software without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
23  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
26  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
29  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
30  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
32  * THE POSSIBILITY OF SUCH DAMAGE.
33  */
34 
35 #include <config.h>
36 
37 #include <assert.h>
38 #include <pthread.h>
39 #include <sys/mman.h>
40 #include <sys/types.h>
41 #include <sys/stat.h>
42 #include <sys/socket.h>
43 #include <netdb.h>
44 #include <sys/un.h>
45 #include <sys/ioctl.h>
46 #include <sys/param.h>
47 #include <netinet/in.h>
48 #include <arpa/inet.h>
49 #include <unistd.h>
50 #include <fcntl.h>
51 #include <stdlib.h>
52 #include <stdio.h>
53 #include <errno.h>
54 #include <sched.h>
55 #include <time.h>
56 #include <sys/time.h>
57 #include <sys/poll.h>
58 #include <limits.h>
59 #include <stdio.h>
60 #include <string.h>
61 #include <stdlib.h>
62 #include <sys/types.h>
63 #include <sys/socket.h>
64 #include <netdb.h>
65 #include <rdma/rdma_cma.h>
66 #include <assert.h>
67 #include <errno.h>
68 
69 #include <corosync/sq.h>
70 #include <corosync/list.h>
71 #include <corosync/hdb.h>
72 #include <corosync/swab.h>
73 
74 #include <qb/qbdefs.h>
75 #include <qb/qbloop.h>
76 #define LOGSYS_UTILS_ONLY 1
77 #include <corosync/logsys.h>
78 #include "totemiba.h"
79 
80 #define COMPLETION_QUEUE_ENTRIES 100
81 
82 #define TOTAL_READ_POSTS 100
83 
84 #define MAX_MTU_SIZE 4096
85 
86 #define MCAST_REJOIN_MSEC 100
87 
89  struct sockaddr bind_addr;
90 
91  struct sockaddr send_token_bind_addr;
92 
93  struct sockaddr mcast_addr;
94 
95  struct sockaddr token_addr;
96 
97  struct sockaddr local_mcast_bind_addr;
98 
100 
102 
104 
106  void *context,
107  const struct totem_ip_address *iface_address);
108 
110  void *context,
111  const void *msg,
112  unsigned int msg_len);
113 
115  void *context);
116 
117  void *rrp_context;
118 
119  qb_loop_timer_handle timer_netif_check_timeout;
120 
122 
124 
125  struct rdma_event_channel *mcast_channel;
126 
127  struct rdma_cm_id *mcast_cma_id;
128 
129  struct ibv_pd *mcast_pd;
130 
131  struct sockaddr mcast_dest_addr;
132 
133  uint32_t mcast_qpn;
134 
135  uint32_t mcast_qkey;
136 
137  struct ibv_ah *mcast_ah;
138 
139  struct ibv_comp_channel *mcast_send_completion_channel;
140 
141  struct ibv_comp_channel *mcast_recv_completion_channel;
142 
143  struct ibv_cq *mcast_send_cq;
144 
145  struct ibv_cq *mcast_recv_cq;
146 
148 
149  struct rdma_event_channel *recv_token_channel;
150 
151  struct rdma_event_channel *listen_recv_token_channel;
152 
153  struct rdma_cm_id *listen_recv_token_cma_id;
154 
155  struct rdma_cm_id *recv_token_cma_id;
156 
157  struct ibv_pd *recv_token_pd;
158 
159  struct sockaddr recv_token_dest_addr;
160 
161  struct ibv_comp_channel *recv_token_send_completion_channel;
162 
163  struct ibv_comp_channel *recv_token_recv_completion_channel;
164 
165  struct ibv_cq *recv_token_send_cq;
166 
167  struct ibv_cq *recv_token_recv_cq;
168 
170 
171  struct rdma_event_channel *send_token_channel;
172 
173  struct rdma_cm_id *send_token_cma_id;
174 
175  struct ibv_pd *send_token_pd;
176 
177  struct sockaddr send_token_dest_addr;
178 
179  uint32_t send_token_qpn;
180 
181  uint32_t send_token_qkey;
182 
183  struct ibv_ah *send_token_ah;
184 
185  struct ibv_comp_channel *send_token_send_completion_channel;
186 
187  struct ibv_comp_channel *send_token_recv_completion_channel;
188 
189  struct ibv_cq *send_token_send_cq;
190 
191  struct ibv_cq *send_token_recv_cq;
192 
194  int level,
195  int subsys,
196  const char *function,
197  const char *file,
198  int line,
199  const char *format,
200  ...)__attribute__((format(printf, 6, 7)));
201 
202 
203  int totemiba_subsys_id;
204 
206 
208 
210 
212 
214 
216 
217  qb_loop_timer_handle mcast_rejoin;
218 };
219 union u {
220  uint64_t wr_id;
221  void *v;
222 };
223 
224 #define log_printf(level, format, args...) \
225 do { \
226  instance->totemiba_log_printf ( \
227  level, \
228  instance->totemiba_subsys_id, \
229  __FUNCTION__, __FILE__, __LINE__, \
230  (const char *)format, ##args); \
231 } while (0);
232 
233 struct recv_buf {
234  struct list_head list_all;
235  struct ibv_recv_wr recv_wr;
236  struct ibv_sge sge;
237  struct ibv_mr *mr;
238  char buffer[MAX_MTU_SIZE + sizeof (struct ibv_grh)];
239 };
240 
241 struct send_buf {
242  struct list_head list_free;
243  struct list_head list_all;
244  struct ibv_mr *mr;
245  char buffer[MAX_MTU_SIZE];
246 };
247 
248 static hdb_handle_t
249 void2wrid (void *v) { union u u; u.v = v; return u.wr_id; }
250 
251 static void *
252 wrid2void (uint64_t wr_id) { union u u; u.wr_id = wr_id; return u.v; }
253 
254 static void totemiba_instance_initialize (struct totemiba_instance *instance)
255 {
256  memset (instance, 0, sizeof (struct totemiba_instance));
257  list_init (&instance->mcast_send_buf_free);
258  list_init (&instance->token_send_buf_free);
259  list_init (&instance->mcast_send_buf_head);
260  list_init (&instance->token_send_buf_head);
261  list_init (&instance->recv_token_recv_buf_head);
262 }
263 
264 static inline struct send_buf *mcast_send_buf_get (
265  struct totemiba_instance *instance)
266 {
267  struct send_buf *send_buf;
268 
269  if (list_empty (&instance->mcast_send_buf_free) == 0) {
270  send_buf = list_entry (instance->mcast_send_buf_free.next, struct send_buf, list_free);
271  list_del (&send_buf->list_free);
272  return (send_buf);
273  }
274 
275  send_buf = malloc (sizeof (struct send_buf));
276  if (send_buf == NULL) {
277  return (NULL);
278  }
279  send_buf->mr = ibv_reg_mr (instance->mcast_pd,
280  send_buf->buffer,
281  MAX_MTU_SIZE, IBV_ACCESS_LOCAL_WRITE);
282  if (send_buf->mr == NULL) {
283  log_printf (LOGSYS_LEVEL_ERROR, "couldn't register memory range");
284  free (send_buf);
285  return (NULL);
286  }
287  list_init (&send_buf->list_all);
288  list_add_tail (&send_buf->list_all, &instance->mcast_send_buf_head);
289 
290  return (send_buf);
291 }
292 
293 static inline void mcast_send_buf_put (
294  struct totemiba_instance *instance,
295  struct send_buf *send_buf)
296 {
297  list_init (&send_buf->list_free);
298  list_add_tail (&send_buf->list_free, &instance->mcast_send_buf_free);
299 }
300 
301 static inline struct send_buf *token_send_buf_get (
302  struct totemiba_instance *instance)
303 {
304  struct send_buf *send_buf;
305 
306  if (list_empty (&instance->token_send_buf_free) == 0) {
307  send_buf = list_entry (instance->token_send_buf_free.next, struct send_buf, list_free);
308  list_del (&send_buf->list_free);
309  return (send_buf);
310  }
311 
312  send_buf = malloc (sizeof (struct send_buf));
313  if (send_buf == NULL) {
314  return (NULL);
315  }
316  send_buf->mr = ibv_reg_mr (instance->send_token_pd,
317  send_buf->buffer,
318  MAX_MTU_SIZE, IBV_ACCESS_LOCAL_WRITE);
319  if (send_buf->mr == NULL) {
320  log_printf (LOGSYS_LEVEL_ERROR, "couldn't register memory range");
321  free (send_buf);
322  return (NULL);
323  }
324  list_init (&send_buf->list_all);
325  list_add_tail (&send_buf->list_all, &instance->token_send_buf_head);
326 
327  return (send_buf);
328 }
329 
330 static inline void token_send_buf_destroy (struct totemiba_instance *instance)
331 {
332  struct list_head *list;
333  struct send_buf *send_buf;
334 
335  for (list = instance->token_send_buf_head.next; list != &instance->token_send_buf_head;) {
336  send_buf = list_entry (list, struct send_buf, list_all);
337  list = list->next;
338  ibv_dereg_mr (send_buf->mr);
339  free (send_buf);
340  }
341 
342  list_init (&instance->token_send_buf_free);
343  list_init (&instance->token_send_buf_head);
344 }
345 
346 static inline void token_send_buf_put (
347  struct totemiba_instance *instance,
348  struct send_buf *send_buf)
349 {
350  list_init (&send_buf->list_free);
351  list_add_tail (&send_buf->list_free, &instance->token_send_buf_free);
352 }
353 
354 static inline struct recv_buf *recv_token_recv_buf_create (
355  struct totemiba_instance *instance)
356 {
357  struct recv_buf *recv_buf;
358 
359  recv_buf = malloc (sizeof (struct recv_buf));
360  if (recv_buf == NULL) {
361  return (NULL);
362  }
363 
364  recv_buf->mr = ibv_reg_mr (instance->recv_token_pd, &recv_buf->buffer,
365  MAX_MTU_SIZE + sizeof (struct ibv_grh),
366  IBV_ACCESS_LOCAL_WRITE);
367 
368  recv_buf->recv_wr.next = NULL;
369  recv_buf->recv_wr.sg_list = &recv_buf->sge;
370  recv_buf->recv_wr.num_sge = 1;
371  recv_buf->recv_wr.wr_id = (uintptr_t)recv_buf;
372 
373  recv_buf->sge.length = MAX_MTU_SIZE + sizeof (struct ibv_grh);
374  recv_buf->sge.lkey = recv_buf->mr->lkey;
375  recv_buf->sge.addr = (uintptr_t)recv_buf->buffer;
376 
377  list_init (&recv_buf->list_all);
378  list_add (&recv_buf->list_all, &instance->recv_token_recv_buf_head);
379  return (recv_buf);
380 }
381 
382 static inline int recv_token_recv_buf_post (struct totemiba_instance *instance, struct recv_buf *recv_buf)
383 {
384  struct ibv_recv_wr *fail_recv;
385  int res;
386 
387  res = ibv_post_recv (instance->recv_token_cma_id->qp, &recv_buf->recv_wr, &fail_recv);
388 
389  return (res);
390 }
391 
392 static inline void recv_token_recv_buf_post_initial (struct totemiba_instance *instance)
393 {
394  struct recv_buf *recv_buf;
395  unsigned int i;
396 
397  for (i = 0; i < TOTAL_READ_POSTS; i++) {
398  recv_buf = recv_token_recv_buf_create (instance);
399 
400  recv_token_recv_buf_post (instance, recv_buf);
401  }
402 }
403 
404 static inline void recv_token_recv_buf_post_destroy (
405  struct totemiba_instance *instance)
406 {
407  struct recv_buf *recv_buf;
408  struct list_head *list;
409 
410  for (list = instance->recv_token_recv_buf_head.next;
411  list != &instance->recv_token_recv_buf_head;) {
412 
413  recv_buf = list_entry (list, struct recv_buf, list_all);
414  list = list->next;
415  ibv_dereg_mr (recv_buf->mr);
416  free (recv_buf);
417  }
418  list_init (&instance->recv_token_recv_buf_head);
419 }
420 
421 static inline struct recv_buf *mcast_recv_buf_create (struct totemiba_instance *instance)
422 {
423  struct recv_buf *recv_buf;
424  struct ibv_mr *mr;
425 
426  recv_buf = malloc (sizeof (struct recv_buf));
427  if (recv_buf == NULL) {
428  return (NULL);
429  }
430 
431  mr = ibv_reg_mr (instance->mcast_pd, &recv_buf->buffer,
432  MAX_MTU_SIZE + sizeof (struct ibv_grh),
433  IBV_ACCESS_LOCAL_WRITE);
434 
435  recv_buf->recv_wr.next = NULL;
436  recv_buf->recv_wr.sg_list = &recv_buf->sge;
437  recv_buf->recv_wr.num_sge = 1;
438  recv_buf->recv_wr.wr_id = (uintptr_t)recv_buf;
439 
440  recv_buf->sge.length = MAX_MTU_SIZE + sizeof (struct ibv_grh);
441  recv_buf->sge.lkey = mr->lkey;
442  recv_buf->sge.addr = (uintptr_t)recv_buf->buffer;
443 
444  return (recv_buf);
445 }
446 
447 static inline int mcast_recv_buf_post (struct totemiba_instance *instance, struct recv_buf *recv_buf)
448 {
449  struct ibv_recv_wr *fail_recv;
450  int res;
451 
452  res = ibv_post_recv (instance->mcast_cma_id->qp, &recv_buf->recv_wr, &fail_recv);
453 
454  return (res);
455 }
456 
457 static inline void mcast_recv_buf_post_initial (struct totemiba_instance *instance)
458 {
459  struct recv_buf *recv_buf;
460  unsigned int i;
461 
462  for (i = 0; i < TOTAL_READ_POSTS; i++) {
463  recv_buf = mcast_recv_buf_create (instance);
464 
465  mcast_recv_buf_post (instance, recv_buf);
466  }
467 }
468 
469 static inline void iba_deliver_fn (struct totemiba_instance *instance, uint64_t wr_id, uint32_t bytes)
470 {
471  const char *addr;
472  const struct recv_buf *recv_buf;
473 
474  recv_buf = wrid2void(wr_id);
475  addr = &recv_buf->buffer[sizeof (struct ibv_grh)];
476 
477  bytes -= sizeof (struct ibv_grh);
478  instance->totemiba_deliver_fn (instance->rrp_context, addr, bytes);
479 }
480 
481 static int mcast_cq_send_event_fn (int fd, int events, void *context)
482 {
483  struct totemiba_instance *instance = (struct totemiba_instance *)context;
484  struct ibv_wc wc[32];
485  struct ibv_cq *ev_cq;
486  void *ev_ctx;
487  int res;
488  int i;
489 
490  ibv_get_cq_event (instance->mcast_send_completion_channel, &ev_cq, &ev_ctx);
491  ibv_ack_cq_events (ev_cq, 1);
492  res = ibv_req_notify_cq (ev_cq, 0);
493 
494  res = ibv_poll_cq (instance->mcast_send_cq, 32, wc);
495  if (res > 0) {
496  for (i = 0; i < res; i++) {
497  mcast_send_buf_put (instance, wrid2void(wc[i].wr_id));
498  }
499  }
500 
501  return (0);
502 }
503 
504 static int mcast_cq_recv_event_fn (int fd, int events, void *context)
505 {
506  struct totemiba_instance *instance = (struct totemiba_instance *)context;
507  struct ibv_wc wc[64];
508  struct ibv_cq *ev_cq;
509  void *ev_ctx;
510  int res;
511  int i;
512 
513  ibv_get_cq_event (instance->mcast_recv_completion_channel, &ev_cq, &ev_ctx);
514  ibv_ack_cq_events (ev_cq, 1);
515  res = ibv_req_notify_cq (ev_cq, 0);
516 
517  res = ibv_poll_cq (instance->mcast_recv_cq, 64, wc);
518  if (res > 0) {
519  for (i = 0; i < res; i++) {
520  iba_deliver_fn (instance, wc[i].wr_id, wc[i].byte_len);
521  mcast_recv_buf_post (instance, wrid2void(wc[i].wr_id));
522  }
523  }
524 
525  return (0);
526 }
527 
528 static void mcast_rejoin (void *data)
529 {
530  int res;
531  struct totemiba_instance *instance = (struct totemiba_instance *)data;
532 
533  res = rdma_leave_multicast (instance->mcast_cma_id, &instance->mcast_addr);
534  if (instance->mcast_ah) {
535  ibv_destroy_ah (instance->mcast_ah);
536  instance->mcast_ah = 0;
537  }
538 
539  res = rdma_join_multicast (instance->mcast_cma_id, &instance->mcast_addr, instance);
540  if (res != 0) {
542  "rdma_join_multicast failed, errno=%d, rejoining in %u ms",
543  errno,
545  qb_loop_timer_add (instance->totemiba_poll_handle,
546  QB_LOOP_MED,
547  MCAST_REJOIN_MSEC * QB_TIME_NS_IN_MSEC,
548  (void *)instance,
549  mcast_rejoin,
550  &instance->mcast_rejoin);
551  }
552 }
553 
554 static int mcast_rdma_event_fn (int fd, int events, void *context)
555 {
556  struct totemiba_instance *instance = (struct totemiba_instance *)context;
557  struct rdma_cm_event *event;
558 
559  int res;
560 
561  res = rdma_get_cm_event (instance->mcast_channel, &event);
562  if (res != 0) {
563  return (0);
564  }
565 
566  switch (event->event) {
567  /*
568  * occurs when we resolve the multicast address
569  */
570  case RDMA_CM_EVENT_ADDR_RESOLVED:
571  res = rdma_join_multicast (instance->mcast_cma_id, &instance->mcast_addr, instance);
572  usleep(1000);
573  if (res == 0) break;
574  case RDMA_CM_EVENT_MULTICAST_ERROR:
575  log_printf (LOGSYS_LEVEL_ERROR, "multicast error, trying to rejoin in %u ms", MCAST_REJOIN_MSEC);
576  qb_loop_timer_add (instance->totemiba_poll_handle,
577  QB_LOOP_MED,
578  MCAST_REJOIN_MSEC * QB_TIME_NS_IN_MSEC,
579  (void *)instance,
580  mcast_rejoin,
581  &instance->mcast_rejoin);
582  break;
583  /*
584  * occurs when the CM joins the multicast group
585  */
586  case RDMA_CM_EVENT_MULTICAST_JOIN:
587  instance->mcast_qpn = event->param.ud.qp_num;
588  instance->mcast_qkey = event->param.ud.qkey;
589  instance->mcast_ah = ibv_create_ah (instance->mcast_pd, &event->param.ud.ah_attr);
590 
591  if (instance->mcast_seen_joined == 0) {
592  log_printf (LOGSYS_LEVEL_DEBUG, "joining mcast 1st time, running callbacks");
593  instance->totemiba_iface_change_fn (instance->rrp_context, &instance->my_id);
594  instance->mcast_seen_joined=1;
595  }
596  log_printf (LOGSYS_LEVEL_NOTICE, "Joined multicast!");
597  break;
598  case RDMA_CM_EVENT_ADDR_ERROR:
599  case RDMA_CM_EVENT_ROUTE_ERROR:
600  case RDMA_CM_EVENT_DEVICE_REMOVAL:
601  break;
602  default:
603  log_printf (LOGSYS_LEVEL_ERROR, "default %d", event->event);
604  break;
605  }
606 
607  rdma_ack_cm_event (event);
608  return (0);
609 }
610 
611 static int recv_token_cq_send_event_fn (
612  int fd,
613  int revents,
614  void *context)
615 {
616  struct totemiba_instance *instance = (struct totemiba_instance *)context;
617  struct ibv_wc wc[32];
618  struct ibv_cq *ev_cq;
619  void *ev_ctx;
620  int res;
621  int i;
622 
623  ibv_get_cq_event (instance->recv_token_send_completion_channel, &ev_cq, &ev_ctx);
624  ibv_ack_cq_events (ev_cq, 1);
625  res = ibv_req_notify_cq (ev_cq, 0);
626 
627  res = ibv_poll_cq (instance->recv_token_send_cq, 32, wc);
628  if (res > 0) {
629  for (i = 0; i < res; i++) {
630  iba_deliver_fn (instance, wc[i].wr_id, wc[i].byte_len);
631  ibv_dereg_mr (wrid2void(wc[i].wr_id));
632  }
633  }
634 
635  return (0);
636 }
637 
638 static int recv_token_cq_recv_event_fn (int fd, int events, void *context)
639 {
640  struct totemiba_instance *instance = (struct totemiba_instance *)context;
641  struct ibv_wc wc[32];
642  struct ibv_cq *ev_cq;
643  void *ev_ctx;
644  int res;
645  int i;
646 
647  ibv_get_cq_event (instance->recv_token_recv_completion_channel, &ev_cq, &ev_ctx);
648  ibv_ack_cq_events (ev_cq, 1);
649  res = ibv_req_notify_cq (ev_cq, 0);
650 
651  res = ibv_poll_cq (instance->recv_token_recv_cq, 32, wc);
652  if (res > 0) {
653  for (i = 0; i < res; i++) {
654  iba_deliver_fn (instance, wc[i].wr_id, wc[i].byte_len);
655  recv_token_recv_buf_post (instance, wrid2void(wc[i].wr_id));
656  }
657  }
658 
659  return (0);
660 }
661 
662 static int recv_token_accept_destroy (struct totemiba_instance *instance)
663 {
664  if (instance->recv_token_accepted == 0) {
665  return (0);
666  }
667 
668  qb_loop_poll_del (
669  instance->totemiba_poll_handle,
670  instance->recv_token_recv_completion_channel->fd);
671 
672  qb_loop_poll_del (
673  instance->totemiba_poll_handle,
674  instance->recv_token_send_completion_channel->fd);
675 
676  rdma_destroy_qp (instance->recv_token_cma_id);
677 
678  recv_token_recv_buf_post_destroy (instance);
679 
680  ibv_destroy_cq (instance->recv_token_send_cq);
681 
682  ibv_destroy_cq (instance->recv_token_recv_cq);
683 
684  ibv_destroy_comp_channel (instance->recv_token_send_completion_channel);
685 
686  ibv_destroy_comp_channel (instance->recv_token_recv_completion_channel);
687 
688  ibv_dealloc_pd (instance->recv_token_pd);
689 
690  rdma_destroy_id (instance->recv_token_cma_id);
691 
692  return (0);
693 }
694 
695 static int recv_token_accept_setup (struct totemiba_instance *instance)
696 {
697  struct ibv_qp_init_attr init_qp_attr;
698  int res = 0;
699 
700  /*
701  * Allocate the protection domain
702  */
703  instance->recv_token_pd = ibv_alloc_pd (instance->recv_token_cma_id->verbs);
704 
705  /*
706  * Create a completion channel
707  */
708  instance->recv_token_recv_completion_channel = ibv_create_comp_channel (instance->recv_token_cma_id->verbs);
709  if (instance->recv_token_recv_completion_channel == NULL) {
710  log_printf (LOGSYS_LEVEL_ERROR, "couldn't create completion channel");
711  return (-1);
712  }
713 
714  /*
715  * Create the completion queue
716  */
717  instance->recv_token_recv_cq = ibv_create_cq (instance->recv_token_cma_id->verbs,
718  COMPLETION_QUEUE_ENTRIES, instance,
720  if (instance->recv_token_recv_cq == NULL) {
721  log_printf (LOGSYS_LEVEL_ERROR, "couldn't create completion queue");
722  return (-1);
723  }
724  res = ibv_req_notify_cq (instance->recv_token_recv_cq, 0);
725  if (res != 0) {
726  log_printf (LOGSYS_LEVEL_ERROR, "couldn't request notifications of the completion queue");
727  return (-1);
728  }
729 
730  /*
731  * Create a completion channel
732  */
733  instance->recv_token_send_completion_channel = ibv_create_comp_channel (instance->recv_token_cma_id->verbs);
734  if (instance->recv_token_send_completion_channel == NULL) {
735  log_printf (LOGSYS_LEVEL_ERROR, "couldn't create completion channel");
736  return (-1);
737  }
738 
739  /*
740  * Create the completion queue
741  */
742  instance->recv_token_send_cq = ibv_create_cq (instance->recv_token_cma_id->verbs,
743  COMPLETION_QUEUE_ENTRIES, instance,
745  if (instance->recv_token_send_cq == NULL) {
746  log_printf (LOGSYS_LEVEL_ERROR, "couldn't create completion queue");
747  return (-1);
748  }
749  res = ibv_req_notify_cq (instance->recv_token_send_cq, 0);
750  if (res != 0) {
751  log_printf (LOGSYS_LEVEL_ERROR, "couldn't request notifications of the completion queue");
752  return (-1);
753  }
754  memset (&init_qp_attr, 0, sizeof (struct ibv_qp_init_attr));
755  init_qp_attr.cap.max_send_wr = 50;
756  init_qp_attr.cap.max_recv_wr = TOTAL_READ_POSTS;
757  init_qp_attr.cap.max_send_sge = 1;
758  init_qp_attr.cap.max_recv_sge = 1;
759  init_qp_attr.qp_context = instance;
760  init_qp_attr.sq_sig_all = 0;
761  init_qp_attr.qp_type = IBV_QPT_UD;
762  init_qp_attr.send_cq = instance->recv_token_send_cq;
763  init_qp_attr.recv_cq = instance->recv_token_recv_cq;
764  res = rdma_create_qp (instance->recv_token_cma_id, instance->recv_token_pd,
765  &init_qp_attr);
766  if (res != 0) {
767  log_printf (LOGSYS_LEVEL_ERROR, "couldn't create queue pair");
768  return (-1);
769  }
770 
771  recv_token_recv_buf_post_initial (instance);
772 
773  qb_loop_poll_add (
774  instance->totemiba_poll_handle,
775  QB_LOOP_MED,
777  POLLIN, instance, recv_token_cq_recv_event_fn);
778 
779  qb_loop_poll_add (
780  instance->totemiba_poll_handle,
781  QB_LOOP_MED,
783  POLLIN, instance, recv_token_cq_send_event_fn);
784 
785  instance->recv_token_accepted = 1;
786 
787  return (res);
788 };
789 
790 static int recv_token_rdma_event_fn (int fd, int events, void *context)
791 {
792  struct totemiba_instance *instance = (struct totemiba_instance *)context;
793  struct rdma_cm_event *event;
794  struct rdma_conn_param conn_param;
795 
796  int res;
797 
798  res = rdma_get_cm_event (instance->listen_recv_token_channel, &event);
799  if (res != 0) {
800  return (0);
801  }
802 
803  switch (event->event) {
804  case RDMA_CM_EVENT_CONNECT_REQUEST:
805  recv_token_accept_destroy (instance);
806 
807  instance->recv_token_cma_id = event->id;
808  recv_token_accept_setup (instance);
809  memset (&conn_param, 0, sizeof (struct rdma_conn_param));
810  conn_param.qp_num = instance->recv_token_cma_id->qp->qp_num;
811  res = rdma_accept (instance->recv_token_cma_id, &conn_param);
812  break;
813  default:
814  log_printf (LOGSYS_LEVEL_ERROR, "default %d", event->event);
815  break;
816  }
817 
818  res = rdma_ack_cm_event (event);
819  return (0);
820 }
821 
822 static int send_token_cq_send_event_fn (int fd, int events, void *context)
823 {
824  struct totemiba_instance *instance = (struct totemiba_instance *)context;
825  struct ibv_wc wc[32];
826  struct ibv_cq *ev_cq;
827  void *ev_ctx;
828  int res;
829  int i;
830 
831  ibv_get_cq_event (instance->send_token_send_completion_channel, &ev_cq, &ev_ctx);
832  ibv_ack_cq_events (ev_cq, 1);
833  res = ibv_req_notify_cq (ev_cq, 0);
834 
835  res = ibv_poll_cq (instance->send_token_send_cq, 32, wc);
836  if (res > 0) {
837  for (i = 0; i < res; i++) {
838  token_send_buf_put (instance, wrid2void(wc[i].wr_id));
839  }
840  }
841 
842  return (0);
843 }
844 
845 static int send_token_cq_recv_event_fn (int fd, int events, void *context)
846 {
847  struct totemiba_instance *instance = (struct totemiba_instance *)context;
848  struct ibv_wc wc[32];
849  struct ibv_cq *ev_cq;
850  void *ev_ctx;
851  int res;
852  int i;
853 
854  ibv_get_cq_event (instance->send_token_recv_completion_channel, &ev_cq, &ev_ctx);
855  ibv_ack_cq_events (ev_cq, 1);
856  res = ibv_req_notify_cq (ev_cq, 0);
857 
858  res = ibv_poll_cq (instance->send_token_recv_cq, 32, wc);
859  if (res > 0) {
860  for (i = 0; i < res; i++) {
861  iba_deliver_fn (instance, wc[i].wr_id, wc[i].byte_len);
862  }
863  }
864 
865  return (0);
866 }
867 
868 static int send_token_rdma_event_fn (int fd, int events, void *context)
869 {
870  struct totemiba_instance *instance = (struct totemiba_instance *)context;
871  struct rdma_cm_event *event;
872  struct rdma_conn_param conn_param;
873 
874  int res;
875 
876  res = rdma_get_cm_event (instance->send_token_channel, &event);
877  if (res != 0) {
878  return (0);
879  }
880 
881  switch (event->event) {
882  /*
883  * occurs when we resolve the multicast address
884  */
885  case RDMA_CM_EVENT_ADDR_RESOLVED:
886  res = rdma_resolve_route (instance->send_token_cma_id, 2000);
887  break;
888  /*
889  * occurs when the CM joins the multicast group
890  */
891  case RDMA_CM_EVENT_ROUTE_RESOLVED:
892  memset (&conn_param, 0, sizeof (struct rdma_conn_param));
893  conn_param.private_data = NULL;
894  conn_param.private_data_len = 0;
895  res = rdma_connect (instance->send_token_cma_id, &conn_param);
896  break;
897  case RDMA_CM_EVENT_ESTABLISHED:
898  instance->send_token_qpn = event->param.ud.qp_num;
899  instance->send_token_qkey = event->param.ud.qkey;
900  instance->send_token_ah = ibv_create_ah (instance->send_token_pd, &event->param.ud.ah_attr);
901  instance->totemiba_target_set_completed (instance->rrp_context);
902  break;
903 
904  case RDMA_CM_EVENT_ADDR_ERROR:
905  case RDMA_CM_EVENT_ROUTE_ERROR:
906  case RDMA_CM_EVENT_MULTICAST_ERROR:
908  "send_token_rdma_event_fn multicast error");
909  break;
910  case RDMA_CM_EVENT_DEVICE_REMOVAL:
911  break;
912  case RDMA_CM_EVENT_UNREACHABLE:
914  "send_token_rdma_event_fn unreachable");
915  break;
916  default:
918  "send_token_rdma_event_fn unknown event %d",
919  event->event);
920  break;
921  }
922 
923  rdma_ack_cm_event (event);
924  return (0);
925 }
926 
927 static int send_token_bind (struct totemiba_instance *instance)
928 {
929  int res;
930  struct ibv_qp_init_attr init_qp_attr;
931 
932  instance->send_token_channel = rdma_create_event_channel();
933  if (instance->send_token_channel == NULL) {
934  log_printf (LOGSYS_LEVEL_ERROR, "couldn't create rdma channel");
935  return (-1);
936  }
937 
938  res = rdma_create_id (instance->send_token_channel,
939  &instance->send_token_cma_id, NULL, RDMA_PS_UDP);
940  if (res) {
941  log_printf (LOGSYS_LEVEL_ERROR, "error creating send_token_cma_id");
942  return (-1);
943  }
944 
945  res = rdma_bind_addr (instance->send_token_cma_id,
946  &instance->send_token_bind_addr);
947  if (res) {
948  log_printf (LOGSYS_LEVEL_ERROR, "error doing rdma_bind_addr for send token");
949  return (-1);
950  }
951 
952  /*
953  * Resolve the send_token address into a GUID
954  */
955  res = rdma_resolve_addr (instance->send_token_cma_id,
956  &instance->bind_addr, &instance->token_addr, 2000);
957  if (res) {
958  log_printf (LOGSYS_LEVEL_ERROR, "error resolving send token address %d %d", res, errno);
959  return (-1);
960  }
961 
962  /*
963  * Allocate the protection domain
964  */
965  instance->send_token_pd = ibv_alloc_pd (instance->send_token_cma_id->verbs);
966 
967  /*
968  * Create a completion channel
969  */
970  instance->send_token_recv_completion_channel = ibv_create_comp_channel (instance->send_token_cma_id->verbs);
971  if (instance->send_token_recv_completion_channel == NULL) {
972  log_printf (LOGSYS_LEVEL_ERROR, "couldn't create completion channel");
973  return (-1);
974  }
975 
976  /*
977  * Create the completion queue
978  */
979  instance->send_token_recv_cq = ibv_create_cq (instance->send_token_cma_id->verbs,
980  COMPLETION_QUEUE_ENTRIES, instance,
982  if (instance->send_token_recv_cq == NULL) {
983  log_printf (LOGSYS_LEVEL_ERROR, "couldn't create completion queue");
984  return (-1);
985  }
986  res = ibv_req_notify_cq (instance->send_token_recv_cq, 0);
987  if (res != 0) {
989  "couldn't request notifications of the completion queue");
990  return (-1);
991  }
992 
993  /*
994  * Create a completion channel
995  */
997  ibv_create_comp_channel (instance->send_token_cma_id->verbs);
998 
999  if (instance->send_token_send_completion_channel == NULL) {
1000  log_printf (LOGSYS_LEVEL_ERROR, "couldn't create completion channel");
1001  return (-1);
1002  }
1003 
1004  /*
1005  * Create the completion queue
1006  */
1007  instance->send_token_send_cq = ibv_create_cq (
1008  instance->send_token_cma_id->verbs,
1009  COMPLETION_QUEUE_ENTRIES, instance,
1010  instance->send_token_send_completion_channel, 0);
1011  if (instance->send_token_send_cq == NULL) {
1012  log_printf (LOGSYS_LEVEL_ERROR, "couldn't create completion queue");
1013  return (-1);
1014  }
1015 
1016  res = ibv_req_notify_cq (instance->send_token_send_cq, 0);
1017  if (res != 0) {
1019  "couldn't request notifications of the completion queue");
1020  return (-1);
1021  }
1022  memset (&init_qp_attr, 0, sizeof (struct ibv_qp_init_attr));
1023  init_qp_attr.cap.max_send_wr = 50;
1024  init_qp_attr.cap.max_recv_wr = TOTAL_READ_POSTS;
1025  init_qp_attr.cap.max_send_sge = 1;
1026  init_qp_attr.cap.max_recv_sge = 1;
1027  init_qp_attr.qp_context = instance;
1028  init_qp_attr.sq_sig_all = 0;
1029  init_qp_attr.qp_type = IBV_QPT_UD;
1030  init_qp_attr.send_cq = instance->send_token_send_cq;
1031  init_qp_attr.recv_cq = instance->send_token_recv_cq;
1032  res = rdma_create_qp (instance->send_token_cma_id,
1033  instance->send_token_pd, &init_qp_attr);
1034  if (res != 0) {
1035  log_printf (LOGSYS_LEVEL_ERROR, "couldn't create queue pair");
1036  return (-1);
1037  }
1038 
1039  qb_loop_poll_add (
1040  instance->totemiba_poll_handle,
1041  QB_LOOP_MED,
1042  instance->send_token_recv_completion_channel->fd,
1043  POLLIN, instance, send_token_cq_recv_event_fn);
1044 
1045  qb_loop_poll_add (
1046  instance->totemiba_poll_handle,
1047  QB_LOOP_MED,
1048  instance->send_token_send_completion_channel->fd,
1049  POLLIN, instance, send_token_cq_send_event_fn);
1050 
1051  qb_loop_poll_add (
1052  instance->totemiba_poll_handle,
1053  QB_LOOP_MED,
1054  instance->send_token_channel->fd,
1055  POLLIN, instance, send_token_rdma_event_fn);
1056 
1057  instance->send_token_bound = 1;
1058  return (0);
1059 }
1060 
1061 static int send_token_unbind (struct totemiba_instance *instance)
1062 {
1063  if (instance->send_token_bound == 0) {
1064  return (0);
1065  }
1066 
1067  qb_loop_poll_del (
1068  instance->totemiba_poll_handle,
1069  instance->send_token_recv_completion_channel->fd);
1070  qb_loop_poll_del (
1071  instance->totemiba_poll_handle,
1072  instance->send_token_send_completion_channel->fd);
1073  qb_loop_poll_del (
1074  instance->totemiba_poll_handle,
1075  instance->send_token_channel->fd);
1076 
1077  if(instance->send_token_ah)
1078  {
1079  ibv_destroy_ah(instance->send_token_ah);
1080  instance->send_token_ah = 0;
1081  }
1082 
1083  rdma_destroy_qp (instance->send_token_cma_id);
1084  ibv_destroy_cq (instance->send_token_send_cq);
1085  ibv_destroy_cq (instance->send_token_recv_cq);
1086  ibv_destroy_comp_channel (instance->send_token_send_completion_channel);
1087  ibv_destroy_comp_channel (instance->send_token_recv_completion_channel);
1088  token_send_buf_destroy (instance);
1089  ibv_dealloc_pd (instance->send_token_pd);
1090  rdma_destroy_id (instance->send_token_cma_id);
1091  rdma_destroy_event_channel (instance->send_token_channel);
1092  return (0);
1093 }
1094 
1095 static int recv_token_bind (struct totemiba_instance *instance)
1096 {
1097  int res;
1098  struct ibv_port_attr port_attr;
1099 
1100  instance->listen_recv_token_channel = rdma_create_event_channel();
1101  if (instance->listen_recv_token_channel == NULL) {
1102  log_printf (LOGSYS_LEVEL_ERROR, "couldn't create rdma channel");
1103  return (-1);
1104  }
1105 
1106  res = rdma_create_id (instance->listen_recv_token_channel,
1107  &instance->listen_recv_token_cma_id, NULL, RDMA_PS_UDP);
1108  if (res) {
1109  log_printf (LOGSYS_LEVEL_ERROR, "error creating recv_token_cma_id");
1110  return (-1);
1111  }
1112 
1113  res = rdma_bind_addr (instance->listen_recv_token_cma_id,
1114  &instance->bind_addr);
1115  if (res) {
1116  log_printf (LOGSYS_LEVEL_ERROR, "error doing rdma_bind_addr for recv token");
1117  return (-1);
1118  }
1119 
1120  /*
1121  * Determine active_mtu of port and compare it with the configured one (160 is aproximation of all totem
1122  * structures.
1123  *
1124  * TODO: Implement MTU discovery also for IP and handle MTU correctly for all structures inside totemsrp,
1125  * crypto, ...
1126  */
1127  res = ibv_query_port (instance->listen_recv_token_cma_id->verbs, instance->listen_recv_token_cma_id->port_num, &port_attr);
1128  if ( (1 << (port_attr.active_mtu + 7)) < instance->totem_config->net_mtu + 160) {
1129  log_printf (LOGSYS_LEVEL_ERROR, "requested net_mtu is %d and is larger than the active port mtu %d\n",\
1130  instance->totem_config->net_mtu + 160, (1 << (port_attr.active_mtu + 7)));
1131  return (-1);
1132  }
1133 
1134  /*
1135  * Resolve the recv_token address into a GUID
1136  */
1137  res = rdma_listen (instance->listen_recv_token_cma_id, 10);
1138  if (res) {
1139  log_printf (LOGSYS_LEVEL_ERROR, "error listening %d %d", res, errno);
1140  return (-1);
1141  }
1142 
1143  qb_loop_poll_add (
1144  instance->totemiba_poll_handle,
1145  QB_LOOP_MED,
1146  instance->listen_recv_token_channel->fd,
1147  POLLIN, instance, recv_token_rdma_event_fn);
1148 
1149  return (0);
1150 }
1151 
1152 static int mcast_bind (struct totemiba_instance *instance)
1153 {
1154  int res;
1155  struct ibv_qp_init_attr init_qp_attr;
1156 
1157  instance->mcast_channel = rdma_create_event_channel();
1158  if (instance->mcast_channel == NULL) {
1159  log_printf (LOGSYS_LEVEL_ERROR, "couldn't create rdma channel");
1160  return (-1);
1161  }
1162 
1163  res = rdma_create_id (instance->mcast_channel, &instance->mcast_cma_id, NULL, RDMA_PS_UDP);
1164  if (res) {
1165  log_printf (LOGSYS_LEVEL_ERROR, "error creating mcast_cma_id");
1166  return (-1);
1167  }
1168 
1169  res = rdma_bind_addr (instance->mcast_cma_id, &instance->local_mcast_bind_addr);
1170  if (res) {
1171  log_printf (LOGSYS_LEVEL_ERROR, "error doing rdma_bind_addr for mcast");
1172  return (-1);
1173  }
1174 
1175  /*
1176  * Resolve the multicast address into a GUID
1177  */
1178  res = rdma_resolve_addr (instance->mcast_cma_id, &instance->local_mcast_bind_addr,
1179  &instance->mcast_addr, 5000);
1180  if (res) {
1181  log_printf (LOGSYS_LEVEL_ERROR, "error resolving multicast address %d %d", res, errno);
1182  return (-1);
1183  }
1184 
1185  /*
1186  * Allocate the protection domain
1187  */
1188  instance->mcast_pd = ibv_alloc_pd (instance->mcast_cma_id->verbs);
1189 
1190  /*
1191  * Create a completion channel
1192  */
1193  instance->mcast_recv_completion_channel = ibv_create_comp_channel (instance->mcast_cma_id->verbs);
1194  if (instance->mcast_recv_completion_channel == NULL) {
1195  log_printf (LOGSYS_LEVEL_ERROR, "couldn't create completion channel");
1196  return (-1);
1197  }
1198 
1199  /*
1200  * Create the completion queue
1201  */
1202  instance->mcast_recv_cq = ibv_create_cq (instance->mcast_cma_id->verbs,
1203  COMPLETION_QUEUE_ENTRIES, instance,
1204  instance->mcast_recv_completion_channel, 0);
1205  if (instance->mcast_recv_cq == NULL) {
1206  log_printf (LOGSYS_LEVEL_ERROR, "couldn't create completion queue");
1207  return (-1);
1208  }
1209  res = ibv_req_notify_cq (instance->mcast_recv_cq, 0);
1210  if (res != 0) {
1211  log_printf (LOGSYS_LEVEL_ERROR, "couldn't request notifications of the completion queue");
1212  return (-1);
1213  }
1214 
1215  /*
1216  * Create a completion channel
1217  */
1218  instance->mcast_send_completion_channel = ibv_create_comp_channel (instance->mcast_cma_id->verbs);
1219  if (instance->mcast_send_completion_channel == NULL) {
1220  log_printf (LOGSYS_LEVEL_ERROR, "couldn't create completion channel");
1221  return (-1);
1222  }
1223 
1224  /*
1225  * Create the completion queue
1226  */
1227  instance->mcast_send_cq = ibv_create_cq (instance->mcast_cma_id->verbs,
1228  COMPLETION_QUEUE_ENTRIES, instance,
1229  instance->mcast_send_completion_channel, 0);
1230  if (instance->mcast_send_cq == NULL) {
1231  log_printf (LOGSYS_LEVEL_ERROR, "couldn't create completion queue");
1232  return (-1);
1233  }
1234  res = ibv_req_notify_cq (instance->mcast_send_cq, 0);
1235  if (res != 0) {
1236  log_printf (LOGSYS_LEVEL_ERROR, "couldn't request notifications of the completion queue");
1237  return (-1);
1238  }
1239  memset (&init_qp_attr, 0, sizeof (struct ibv_qp_init_attr));
1240  init_qp_attr.cap.max_send_wr = 50;
1241  init_qp_attr.cap.max_recv_wr = TOTAL_READ_POSTS;
1242  init_qp_attr.cap.max_send_sge = 1;
1243  init_qp_attr.cap.max_recv_sge = 1;
1244  init_qp_attr.qp_context = instance;
1245  init_qp_attr.sq_sig_all = 0;
1246  init_qp_attr.qp_type = IBV_QPT_UD;
1247  init_qp_attr.send_cq = instance->mcast_send_cq;
1248  init_qp_attr.recv_cq = instance->mcast_recv_cq;
1249  res = rdma_create_qp (instance->mcast_cma_id, instance->mcast_pd,
1250  &init_qp_attr);
1251  if (res != 0) {
1252  log_printf (LOGSYS_LEVEL_ERROR, "couldn't create queue pair");
1253  return (-1);
1254  }
1255 
1256  mcast_recv_buf_post_initial (instance);
1257 
1258  qb_loop_poll_add (
1259  instance->totemiba_poll_handle,
1260  QB_LOOP_MED,
1261  instance->mcast_recv_completion_channel->fd,
1262  POLLIN, instance, mcast_cq_recv_event_fn);
1263 
1264  qb_loop_poll_add (
1265  instance->totemiba_poll_handle,
1266  QB_LOOP_MED,
1267  instance->mcast_send_completion_channel->fd,
1268  POLLIN, instance, mcast_cq_send_event_fn);
1269 
1270  qb_loop_poll_add (
1271  instance->totemiba_poll_handle,
1272  QB_LOOP_MED,
1273  instance->mcast_channel->fd,
1274  POLLIN, instance, mcast_rdma_event_fn);
1275 
1276  return (0);
1277 }
1278 
1279 static void timer_function_netif_check_timeout (
1280  void *data)
1281 {
1282  struct totemiba_instance *instance = (struct totemiba_instance *)data;
1283  int res;
1284  int interface_up;
1285  int interface_num;
1286  int addr_len;
1287 
1289  &instance->totem_interface->boundto, &interface_up, &interface_num, instance->totem_config->clear_node_high_bit);
1290 
1292  instance->totem_interface->ip_port, (struct sockaddr_storage *)&instance->bind_addr,
1293  &addr_len);
1294 
1296  0, (struct sockaddr_storage *)&instance->send_token_bind_addr,
1297  &addr_len);
1298 
1300  0, (struct sockaddr_storage *)&instance->local_mcast_bind_addr,
1301  &addr_len);
1302 
1304  instance->totem_interface->ip_port, (struct sockaddr_storage *)&instance->my_id,
1305  &addr_len);
1306 
1308  (const struct sockaddr_storage *)&instance->bind_addr,
1309  &instance->my_id);
1310 
1311  memcpy (&instance->my_id, &instance->totem_interface->boundto,
1312  sizeof (struct totem_ip_address));
1313 
1315  instance->totem_interface->ip_port,
1316  (struct sockaddr_storage *)&instance->mcast_addr, &addr_len);
1317 
1318  res = recv_token_bind (instance);
1319 
1320  res = mcast_bind (instance);
1321 }
1322 
1324  void *iba_context,
1325  const char *cipher_type,
1326  const char *hash_type)
1327 {
1328  struct totemiba_instance *instance = (struct totemiba_instance *)iba_context;
1329  int res = 0;
1330 
1331  instance = NULL;
1332 
1333  return (res);
1334 }
1335 
1337  void *iba_context)
1338 {
1339  struct totemiba_instance *instance = (struct totemiba_instance *)iba_context;
1340  int res = 0;
1341 
1342  instance = NULL;
1343 
1344  return (res);
1345 }
1346 
1347 /*
1348  * Create an instance
1349  */
1351  qb_loop_t *qb_poll_handle,
1352  void **iba_context,
1353  struct totem_config *totem_config,
1354  totemsrp_stats_t *stats,
1355  int interface_no,
1356  void *context,
1357 
1358  void (*deliver_fn) (
1359  void *context,
1360  const void *msg,
1361  unsigned int msg_len),
1362 
1363  void (*iface_change_fn) (
1364  void *context,
1365  const struct totem_ip_address *iface_address),
1366 
1367  void (*target_set_completed) (
1368  void *context))
1369 {
1370  struct totemiba_instance *instance;
1371  int res = 0;
1372 
1373  instance = malloc (sizeof (struct totemiba_instance));
1374  if (instance == NULL) {
1375  return (-1);
1376  }
1377 
1378  totemiba_instance_initialize (instance);
1379 
1380  instance->totem_interface = &totem_config->interfaces[interface_no];
1381 
1382  instance->totemiba_poll_handle = qb_poll_handle;
1383 
1384  instance->totem_interface->bindnet.nodeid = totem_config->node_id;
1385 
1386  instance->totemiba_deliver_fn = deliver_fn;
1387 
1388  instance->totemiba_target_set_completed = target_set_completed;
1389 
1390  instance->totemiba_iface_change_fn = iface_change_fn;
1391 
1392  instance->totem_config = totem_config;
1393  instance->stats = stats;
1394 
1395  instance->rrp_context = context;
1396 
1397  qb_loop_timer_add (instance->totemiba_poll_handle,
1398  QB_LOOP_MED,
1399  100*QB_TIME_NS_IN_MSEC,
1400  (void *)instance,
1401  timer_function_netif_check_timeout,
1402  &instance->timer_netif_check_timeout);
1403 
1404  instance->totemiba_subsys_id = totem_config->totem_logging_configuration.log_subsys_id;
1405  instance->totemiba_log_printf = totem_config->totem_logging_configuration.log_printf;
1406 
1407  *iba_context = instance;
1408  return (res);
1409 }
1410 
1412 {
1413  return malloc (MAX_MTU_SIZE);
1414 }
1415 
1416 void totemiba_buffer_release (void *ptr)
1417 {
1418  return free (ptr);
1419 }
1420 
1422  void *iba_context,
1423  int processor_count)
1424 {
1425  struct totemiba_instance *instance = (struct totemiba_instance *)iba_context;
1426  int res = 0;
1427 
1428  instance = NULL;
1429 
1430  return (res);
1431 }
1432 
1433 int totemiba_recv_flush (void *iba_context)
1434 {
1435  struct totemiba_instance *instance = (struct totemiba_instance *)iba_context;
1436  int res = 0;
1437 
1438  instance = NULL;
1439 
1440  return (res);
1441 }
1442 
1443 int totemiba_send_flush (void *iba_context)
1444 {
1445  struct totemiba_instance *instance = (struct totemiba_instance *)iba_context;
1446  int res = 0;
1447 
1448  instance = NULL;
1449 
1450  return (res);
1451 }
1452 
1454  void *iba_context,
1455  const void *ms,
1456  unsigned int msg_len)
1457 {
1458  struct totemiba_instance *instance = (struct totemiba_instance *)iba_context;
1459  int res = 0;
1460  struct ibv_send_wr send_wr, *failed_send_wr;
1461  struct ibv_sge sge;
1462  void *msg;
1463  struct send_buf *send_buf;
1464 
1465  send_buf = token_send_buf_get (instance);
1466  if (send_buf == NULL) {
1467  return (-1);
1468  }
1469  msg = send_buf->buffer;
1470  memcpy (msg, ms, msg_len);
1471 
1472  send_wr.next = NULL;
1473  send_wr.sg_list = &sge;
1474  send_wr.num_sge = 1;
1475  send_wr.opcode = IBV_WR_SEND;
1476  send_wr.send_flags = IBV_SEND_SIGNALED;
1477  send_wr.wr_id = void2wrid(send_buf);
1478  send_wr.imm_data = 0;
1479  send_wr.wr.ud.ah = instance->send_token_ah;
1480  send_wr.wr.ud.remote_qpn = instance->send_token_qpn;
1481  send_wr.wr.ud.remote_qkey = instance->send_token_qkey;
1482 
1483  sge.length = msg_len;
1484  sge.lkey = send_buf->mr->lkey;
1485  sge.addr = (uintptr_t)msg;
1486 
1487  if(instance->send_token_ah != 0 && instance->send_token_bound)
1488  res = ibv_post_send (instance->send_token_cma_id->qp, &send_wr, &failed_send_wr);
1489 
1490  return (res);
1491 }
1492 
1494  void *iba_context,
1495  const void *ms,
1496  unsigned int msg_len)
1497 {
1498  struct totemiba_instance *instance = (struct totemiba_instance *)iba_context;
1499  int res = 0;
1500  struct ibv_send_wr send_wr, *failed_send_wr;
1501  struct ibv_sge sge;
1502  void *msg;
1503  struct send_buf *send_buf;
1504 
1505  send_buf = mcast_send_buf_get (instance);
1506  if (send_buf == NULL) {
1507  return (-1);
1508  }
1509 
1510  msg = send_buf->buffer;
1511  memcpy (msg, ms, msg_len);
1512  send_wr.next = NULL;
1513  send_wr.sg_list = &sge;
1514  send_wr.num_sge = 1;
1515  send_wr.opcode = IBV_WR_SEND;
1516  send_wr.send_flags = IBV_SEND_SIGNALED;
1517  send_wr.wr_id = void2wrid(send_buf);
1518  send_wr.imm_data = 0;
1519  send_wr.wr.ud.ah = instance->mcast_ah;
1520  send_wr.wr.ud.remote_qpn = instance->mcast_qpn;
1521  send_wr.wr.ud.remote_qkey = instance->mcast_qkey;
1522 
1523  sge.length = msg_len;
1524  sge.lkey = send_buf->mr->lkey;
1525  sge.addr = (uintptr_t)msg;
1526 
1527  if (instance->mcast_ah != 0) {
1528  res = ibv_post_send (instance->mcast_cma_id->qp, &send_wr, &failed_send_wr);
1529  }
1530 
1531  return (res);
1532 }
1533 
1535  void *iba_context,
1536  const void *ms,
1537  unsigned int msg_len)
1538 {
1539  struct totemiba_instance *instance = (struct totemiba_instance *)iba_context;
1540  int res = 0;
1541  struct ibv_send_wr send_wr, *failed_send_wr;
1542  struct ibv_sge sge;
1543  void *msg;
1544  struct send_buf *send_buf;
1545 
1546  send_buf = mcast_send_buf_get (instance);
1547  if (send_buf == NULL) {
1548  return (-1);
1549  }
1550 
1551  msg = send_buf->buffer;
1552  memcpy (msg, ms, msg_len);
1553  send_wr.next = NULL;
1554  send_wr.sg_list = &sge;
1555  send_wr.num_sge = 1;
1556  send_wr.opcode = IBV_WR_SEND;
1557  send_wr.send_flags = IBV_SEND_SIGNALED;
1558  send_wr.wr_id = void2wrid(send_buf);
1559  send_wr.imm_data = 0;
1560  send_wr.wr.ud.ah = instance->mcast_ah;
1561  send_wr.wr.ud.remote_qpn = instance->mcast_qpn;
1562  send_wr.wr.ud.remote_qkey = instance->mcast_qkey;
1563 
1564  sge.length = msg_len;
1565  sge.lkey = send_buf->mr->lkey;
1566  sge.addr = (uintptr_t)msg;
1567 
1568  if (instance->mcast_ah != 0) {
1569  res = ibv_post_send (instance->mcast_cma_id->qp, &send_wr, &failed_send_wr);
1570  }
1571 
1572  return (res);
1573 }
1574 
1575 extern int totemiba_iface_check (void *iba_context)
1576 {
1577  struct totemiba_instance *instance = (struct totemiba_instance *)iba_context;
1578  int res = 0;
1579 
1580  instance = NULL;
1581 
1582  return (res);
1583 }
1584 
1585 extern void totemiba_net_mtu_adjust (void *iba_context, struct totem_config *totem_config)
1586 {
1587  struct totemiba_instance *instance = (struct totemiba_instance *)iba_context;
1588  instance = NULL;
1589 }
1590 
1591 const char *totemiba_iface_print (void *iba_context) {
1592  struct totemiba_instance *instance = (struct totemiba_instance *)iba_context;
1593 
1594  const char *ret_char;
1595 
1596  ret_char = totemip_print (&instance->my_id);
1597 
1598  return (ret_char);
1599 }
1600 
1602  void *iba_context,
1603  struct totem_ip_address *addr)
1604 {
1605  struct totemiba_instance *instance = (struct totemiba_instance *)iba_context;
1606  int res = 0;
1607 
1608  memcpy (addr, &instance->my_id, sizeof (struct totem_ip_address));
1609 
1610  return (res);
1611 }
1612 
1614  void *iba_context,
1615  const struct totem_ip_address *token_target)
1616 {
1617  struct totemiba_instance *instance = (struct totemiba_instance *)iba_context;
1618  int res = 0;
1619  int addr_len = 16;
1620 
1622  instance->totem_interface->ip_port, (struct sockaddr_storage *)&instance->token_addr,
1623  &addr_len);
1624 
1625  res = send_token_unbind (instance);
1626 
1627  res = send_token_bind (instance);
1628 
1629  return (res);
1630 }
1631 
1633  void *iba_context)
1634 {
1635  struct totemiba_instance *instance = (struct totemiba_instance *)iba_context;
1636  int res = 0;
1637 
1638  instance = NULL;
1639 
1640  return (res);
1641 }
1642 
unsigned int clear_node_high_bit
Definition: totem.h:117
struct ibv_cq * mcast_recv_cq
Definition: totemiba.c:145
struct list_head mcast_send_buf_head
Definition: totemiba.c:209
struct sockaddr mcast_addr
Definition: totemiba.c:93
void(* totemiba_iface_change_fn)(void *context, const struct totem_ip_address *iface_address)
Definition: totemiba.c:105
Definition: exec/cpg.c:1761
struct ibv_comp_channel * send_token_recv_completion_channel
Definition: totemiba.c:187
struct ibv_cq * recv_token_send_cq
Definition: totemiba.c:165
int recv_token_accepted
Definition: totemiba.c:147
int totemiba_recv_mcast_empty(void *iba_context)
Definition: totemiba.c:1632
struct totem_interface * interfaces
Definition: totem.h:114
struct ibv_comp_channel * send_token_send_completion_channel
Definition: totemiba.c:185
struct ibv_cq * send_token_recv_cq
Definition: totemiba.c:191
struct list_head * next
Definition: list.h:47
The totem_ip_address struct.
Definition: coroapi.h:111
const char * totemip_print(const struct totem_ip_address *addr)
Definition: totemip.c:214
struct sockaddr send_token_bind_addr
Definition: totemiba.c:91
struct sockaddr local_mcast_bind_addr
Definition: totemiba.c:97
int totemiba_send_flush(void *iba_context)
Definition: totemiba.c:1443
struct ibv_mr * mr
Definition: totemiba.c:244
int totemiba_token_target_set(void *iba_context, const struct totem_ip_address *token_target)
Definition: totemiba.c:1613
char buffer[MAX_MTU_SIZE]
Definition: totemiba.c:245
struct sockaddr token_addr
Definition: totemiba.c:95
struct rdma_event_channel * send_token_channel
Definition: totemiba.c:171
uint32_t send_token_qpn
Definition: totemiba.c:179
struct rdma_event_channel * recv_token_channel
Definition: totemiba.c:149
void(* totemiba_deliver_fn)(void *context, const void *msg, unsigned int msg_len)
Definition: totemiba.c:109
struct ibv_comp_channel * mcast_recv_completion_channel
Definition: totemiba.c:141
int totemiba_initialize(qb_loop_t *qb_poll_handle, void **iba_context, struct totem_config *totem_config, totemsrp_stats_t *stats, int interface_no, void *context, void(*deliver_fn)(void *context, const void *msg, unsigned int msg_len), void(*iface_change_fn)(void *context, const struct totem_ip_address *iface_address), void(*target_set_completed)(void *context))
Create an instance.
Definition: totemiba.c:1350
int totemiba_mcast_flush_send(void *iba_context, const void *ms, unsigned int msg_len)
Definition: totemiba.c:1493
unsigned char addr[TOTEMIP_ADDRLEN]
Definition: coroapi.h:77
void * totemiba_buffer_alloc(void)
Definition: totemiba.c:1411
#define MCAST_REJOIN_MSEC
Definition: totemiba.c:86
struct list_head list_all
Definition: totemiba.c:234
uint32_t mcast_qkey
Definition: totemiba.c:135
Definition: list.h:46
int totemiba_iface_check(void *iba_context)
Definition: totemiba.c:1575
void * rrp_context
Definition: totemiba.c:117
struct list_head token_send_buf_free
Definition: totemiba.c:207
struct list_head recv_token_recv_buf_head
Definition: totemiba.c:213
qb_loop_timer_handle timer_netif_check_timeout
Definition: totemiba.c:119
#define TOTAL_READ_POSTS
Definition: totemiba.c:82
struct totem_interface * totem_interface
Definition: totemiba.c:99
unsigned int node_id
Definition: totem.h:116
int totemiba_crypto_set(void *iba_context, const char *cipher_type, const char *hash_type)
Definition: totemiba.c:1323
int totemip_iface_check(struct totem_ip_address *bindnet, struct totem_ip_address *boundto, int *interface_up, int *interface_num, int mask_high_bit)
Definition: totemip.c:405
struct list_head mcast_send_buf_free
Definition: totemiba.c:205
struct ibv_pd * mcast_pd
Definition: totemiba.c:129
void(*) in totemiba_subsys_id)
Definition: totemiba.c:200
qb_loop_timer_handle mcast_rejoin
Definition: totemiba.c:217
int totemiba_token_send(void *iba_context, const void *ms, unsigned int msg_len)
Definition: totemiba.c:1453
unsigned int nodeid
Definition: coroapi.h:112
#define MAX_MTU_SIZE
Definition: totemiba.c:84
struct ibv_comp_channel * recv_token_recv_completion_channel
Definition: totemiba.c:163
struct sockaddr bind_addr
Definition: totemiba.c:89
int totemiba_iface_get(void *iba_context, struct totem_ip_address *addr)
Definition: totemiba.c:1601
struct totem_ip_address mcast_addr
Definition: totem.h:67
struct sockaddr recv_token_dest_addr
Definition: totemiba.c:159
struct ibv_cq * recv_token_recv_cq
Definition: totemiba.c:167
#define LOGSYS_LEVEL_ERROR
Definition: logsys.h:70
Linked list API.
struct ibv_cq * mcast_send_cq
Definition: totemiba.c:143
struct list_head list_all
Definition: totemiba.c:243
struct list_head token_send_buf_head
Definition: totemiba.c:211
#define LOGSYS_LEVEL_DEBUG
Definition: logsys.h:74
void totemiba_net_mtu_adjust(void *iba_context, struct totem_config *totem_config)
Definition: totemiba.c:1585
struct ibv_comp_channel * mcast_send_completion_channel
Definition: totemiba.c:139
struct totem_ip_address boundto
Definition: totem.h:66
typedef __attribute__
struct ibv_mr * mr
Definition: totemiba.c:237
struct ibv_pd * send_token_pd
Definition: totemiba.c:175
void(* log_printf)(int level, int subsys, const char *function_name, const char *file_name, int file_line, const char *format,...) __attribute__((format(printf
Definition: totem.h:75
const char * totemiba_iface_print(void *iba_context)
Definition: totemiba.c:1591
uint16_t ip_port
Definition: totem.h:68
struct ibv_cq * send_token_send_cq
Definition: totemiba.c:189
struct rdma_event_channel * mcast_channel
Definition: totemiba.c:125
struct rdma_cm_id * listen_recv_token_cma_id
Definition: totemiba.c:153
unsigned int net_mtu
Definition: totem.h:165
struct rdma_event_channel * listen_recv_token_channel
Definition: totemiba.c:151
struct ibv_ah * send_token_ah
Definition: totemiba.c:183
int totemiba_finalize(void *iba_context)
Definition: totemiba.c:1336
struct list_head list_free
Definition: totemiba.c:242
struct ibv_ah * mcast_ah
Definition: totemiba.c:137
qb_handle_t hdb_handle_t
Definition: hdb.h:52
qb_loop_t * totemiba_poll_handle
Definition: totemiba.c:121
#define COMPLETION_QUEUE_ENTRIES
Definition: totemiba.c:80
#define log_printf(level, format, args...)
Definition: totemiba.c:224
struct sockaddr mcast_dest_addr
Definition: totemiba.c:131
struct sockaddr send_token_dest_addr
Definition: totemiba.c:177
struct totem_ip_address my_id
Definition: totemiba.c:123
struct ibv_recv_wr recv_wr
Definition: totemiba.c:235
struct ibv_sge sge
Definition: totemiba.c:236
struct rdma_cm_id * mcast_cma_id
Definition: totemiba.c:127
void(* totemiba_target_set_completed)(void *context)
Definition: totemiba.c:114
int totemiba_mcast_noflush_send(void *iba_context, const void *ms, unsigned int msg_len)
Definition: totemiba.c:1534
totemsrp_stats_t * stats
Definition: totemiba.c:103
uint32_t mcast_qpn
Definition: totemiba.c:133
struct totem_config * totem_config
Definition: totemiba.c:101
struct ibv_pd * recv_token_pd
Definition: totemiba.c:157
uint32_t send_token_qkey
Definition: totemiba.c:181
uint64_t wr_id
Definition: totemiba.c:220
struct rdma_cm_id * recv_token_cma_id
Definition: totemiba.c:155
#define list_entry(ptr, type, member)
Definition: list.h:84
struct rdma_cm_id * send_token_cma_id
Definition: totemiba.c:173
int totemip_totemip_to_sockaddr_convert(struct totem_ip_address *ip_addr, uint16_t port, struct sockaddr_storage *saddr, int *addrlen)
Definition: totemip.c:222
struct totem_logging_configuration totem_logging_configuration
Definition: totem.h:163
void(* totemiba_log_printf)(int level, int subsys, const char *function, const char *file, int line, const char *format,...) __attribute__((format(printf
Definition: totemiba.c:193
#define LOGSYS_LEVEL_NOTICE
Definition: logsys.h:72
struct ibv_comp_channel * recv_token_send_completion_channel
Definition: totemiba.c:161
int totemiba_processor_count_set(void *iba_context, int processor_count)
Definition: totemiba.c:1421
int totemiba_recv_flush(void *iba_context)
Definition: totemiba.c:1433
int totemip_sockaddr_to_totemip_convert(const struct sockaddr_storage *saddr, struct totem_ip_address *ip_addr)
Definition: totemip.c:295
char buffer[MAX_MTU_SIZE+sizeof(struct ibv_grh)]
Definition: totemiba.c:238
struct totem_ip_address bindnet
Definition: totem.h:65
void * v
Definition: totemiba.c:221
void totemiba_buffer_release(void *ptr)
Definition: totemiba.c:1416