3
3
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
4
4
* University Research and Technology
5
5
* Corporation. All rights reserved.
6
- * Copyright (c) 2004-2016 The University of Tennessee and The University
6
+ * Copyright (c) 2004-2017 The University of Tennessee and The University
7
7
* of Tennessee Research Foundation. All rights
8
8
* reserved.
9
9
* Copyright (c) 2004-2007 High Performance Computing Center Stuttgart,
@@ -66,7 +66,7 @@ OBJ_CLASS_INSTANCE( mca_pml_ob1_recv_frag_t,
66
66
*/
67
67
68
68
/**
69
- * Append a unexpected descriptor to a queue. This function will allocate and
69
+ * Append an unexpected descriptor to a queue. This function will allocate and
70
70
* initialize the fragment (if necessary) and then will add it to the specified
71
71
* queue. The allocated fragment is not returned to the caller.
72
72
*/
@@ -82,21 +82,92 @@ append_frag_to_list(opal_list_t *queue, mca_btl_base_module_t *btl,
82
82
opal_list_append (queue , (opal_list_item_t * )frag );
83
83
}
84
84
85
+ /**
86
+ * Append an unexpected descriptor to an ordered queue. This function will allocate and
87
+ * initialize the fragment (if necessary) and then will add it to the specified
88
+ * queue respecting the sequence number. The allocated fragment is not returned to the caller.
89
+ */
90
+ static void
91
+ append_frag_to_ordered_list (opal_list_t * queue , mca_btl_base_module_t * btl ,
92
+ mca_pml_ob1_match_hdr_t * hdr , mca_btl_base_segment_t * segments ,
93
+ size_t num_segments , mca_pml_ob1_recv_frag_t * frag )
94
+ {
95
+ mca_pml_ob1_recv_frag_t * tmpfrag ;
96
+ mca_pml_ob1_match_hdr_t * tmphdr ;
97
+
98
+ if (NULL == frag ) {
99
+ MCA_PML_OB1_RECV_FRAG_ALLOC (frag );
100
+ MCA_PML_OB1_RECV_FRAG_INIT (frag , hdr , segments , num_segments , btl );
101
+ }
102
+
103
+ if ( opal_list_is_empty (queue ) ) { /* no pending fragments yet */
104
+ opal_list_append (queue , (opal_list_item_t * )frag );
105
+ return ;
106
+ }
107
+ /* Shortcut for sequence number earlier than the first fragment in the list */
108
+ tmpfrag = (mca_pml_ob1_recv_frag_t * )opal_list_get_first (queue );
109
+ tmphdr = & tmpfrag -> hdr .hdr_match ;
110
+ assert (hdr -> hdr_seq != tmphdr -> hdr_seq );
111
+ if ( hdr -> hdr_seq < tmphdr -> hdr_seq ) {
112
+ opal_list_prepend (queue , (opal_list_item_t * )frag );
113
+ return ;
114
+ }
115
+ /* Shortcut for sequence number later than the last fragment in the list */
116
+ tmpfrag = (mca_pml_ob1_recv_frag_t * )opal_list_get_last (queue );
117
+ tmphdr = & tmpfrag -> hdr .hdr_match ;
118
+ if ( hdr -> hdr_seq > tmphdr -> hdr_seq ) {
119
+ opal_list_append (queue , (opal_list_item_t * )frag );
120
+ return ;
121
+ }
122
+ /* For all other cases (sequence number missing in the list) */
123
+ OPAL_LIST_FOREACH (tmpfrag , queue , mca_pml_ob1_recv_frag_t ) {
124
+ tmphdr = & tmpfrag -> hdr .hdr_match ;
125
+ if ( hdr -> hdr_seq < tmphdr -> hdr_seq ) {
126
+ opal_list_insert_pos (queue , (opal_list_item_t * )tmpfrag ,
127
+ (opal_list_item_t * ) frag );
128
+ return ;
129
+ }
130
+ }
131
+ }
132
+
85
133
/**
86
134
* Match incoming recv_frags against posted receives.
87
135
* Supports out of order delivery.
88
136
*
89
- * @param frag_header (IN) Header of received recv_frag.
90
- * @param frag_desc (IN) Received recv_frag descriptor.
91
- * @param match_made (OUT) Flag indicating wether a match was made.
92
- * @param additional_matches (OUT ) List of additional matches
137
+ * @param hdr (IN) Header of received recv_frag.
138
+ * @param segments (IN) Received recv_frag descriptor.
139
+ * @param num_segments (IN) Flag indicating wether a match was made.
140
+ * @param type (IN ) Type of the message header.
93
141
* @return OMPI_SUCCESS or error status on failure.
94
142
*/
95
143
static int mca_pml_ob1_recv_frag_match ( mca_btl_base_module_t * btl ,
96
144
mca_pml_ob1_match_hdr_t * hdr ,
97
145
mca_btl_base_segment_t * segments ,
98
146
size_t num_segments ,
99
- int type );
147
+ int type );
148
+
149
+ /**
150
+ * Match incoming frags against posted receives. If frag is not NULL then we assume
151
+ * it is already local and that it can be released upon completion.
152
+ * Supports out of order delivery.
153
+ *
154
+ * @param comm_ptr (IN) Communicator where the message has been received
155
+ * @param proc (IN) Proc for which we have received the message.
156
+ * @param hdr (IN) Header of received recv_frag.
157
+ * @param segments (IN) Received recv_frag descriptor.
158
+ * @param num_segments (IN) Flag indicating wether a match was made.
159
+ * @param type (IN) Type of the message header.
160
+ * @return OMPI_SUCCESS or error status on failure.
161
+ */
162
+ static int
163
+ mca_pml_ob1_recv_frag_match_proc ( mca_btl_base_module_t * btl ,
164
+ ompi_communicator_t * comm_ptr ,
165
+ mca_pml_ob1_comm_proc_t * proc ,
166
+ mca_pml_ob1_match_hdr_t * hdr ,
167
+ mca_btl_base_segment_t * segments ,
168
+ size_t num_segments ,
169
+ int type ,
170
+ mca_pml_ob1_recv_frag_t * frag );
100
171
101
172
static mca_pml_ob1_recv_request_t *
102
173
match_one (mca_btl_base_module_t * btl ,
@@ -105,6 +176,19 @@ match_one(mca_btl_base_module_t *btl,
105
176
mca_pml_ob1_comm_proc_t * proc ,
106
177
mca_pml_ob1_recv_frag_t * frag );
107
178
179
+ static inline mca_pml_ob1_recv_frag_t * check_cantmatch_for_match (mca_pml_ob1_comm_proc_t * proc )
180
+ {
181
+ mca_pml_ob1_recv_frag_t * frag = NULL ;
182
+
183
+ frag = (mca_pml_ob1_recv_frag_t * )opal_list_get_first (& proc -> frags_cant_match );
184
+ if ( (opal_list_get_end (& proc -> frags_cant_match ) != (opal_list_item_t * )frag ) &&
185
+ (frag -> hdr .hdr_match .hdr_seq == proc -> expected_sequence ) ) {
186
+ opal_list_remove_item (& proc -> frags_cant_match , (opal_list_item_t * )frag );
187
+ return frag ;
188
+ }
189
+ return NULL ;
190
+ }
191
+
108
192
void mca_pml_ob1_recv_frag_callback_match (mca_btl_base_module_t * btl ,
109
193
mca_btl_base_tag_t tag ,
110
194
mca_btl_base_descriptor_t * des ,
@@ -164,15 +248,16 @@ void mca_pml_ob1_recv_frag_callback_match(mca_btl_base_module_t* btl,
164
248
OB1_MATCHING_LOCK (& comm -> matching_lock );
165
249
166
250
if (!OMPI_COMM_CHECK_ASSERT_ALLOW_OVERTAKE (comm_ptr )) {
167
- /* get sequence number of next message that can be processed */
168
- if (OPAL_UNLIKELY ((((uint16_t ) hdr -> hdr_seq ) != ((uint16_t ) proc -> expected_sequence )) ||
169
- (opal_list_get_size (& proc -> frags_cant_match ) > 0 ))) {
170
- goto slow_path ;
171
- }
172
-
173
- /* This is the sequence number we were expecting, so we can try
174
- * matching it to already posted receives.
251
+ /* get sequence number of next message that can be processed.
252
+ * If this frag is out of sequence, queue it up in the list
253
+ * now as we still have the lock.
175
254
*/
255
+ if (OPAL_UNLIKELY (((uint16_t ) hdr -> hdr_seq ) != ((uint16_t ) proc -> expected_sequence ))) {
256
+ append_frag_to_ordered_list (& proc -> frags_cant_match , btl ,
257
+ hdr , segments , num_segments , NULL );
258
+ OB1_MATCHING_UNLOCK (& comm -> matching_lock );
259
+ return ;
260
+ }
176
261
177
262
/* We're now expecting the next sequence number. */
178
263
proc -> expected_sequence ++ ;
@@ -183,14 +268,13 @@ void mca_pml_ob1_recv_frag_callback_match(mca_btl_base_module_t* btl,
183
268
* generation until we reach the correct sequence number.
184
269
*/
185
270
PERUSE_TRACE_MSG_EVENT (PERUSE_COMM_SEARCH_POSTED_Q_BEGIN , comm_ptr ,
186
- hdr -> hdr_src , hdr -> hdr_tag , PERUSE_RECV );
271
+ hdr -> hdr_src , hdr -> hdr_tag , PERUSE_RECV );
187
272
188
273
match = match_one (btl , hdr , segments , num_segments , comm_ptr , proc , NULL );
189
274
190
275
/* The match is over. We generate the SEARCH_POSTED_Q_END here,
191
- * before going into the mca_pml_ob1_check_cantmatch_for_match so
192
- * we can make a difference for the searching time for all
193
- * messages.
276
+ * before going into check_cantmatch_for_match so we can make
277
+ * a difference for the searching time for all messages.
194
278
*/
195
279
PERUSE_TRACE_MSG_EVENT (PERUSE_COMM_SEARCH_POSTED_Q_END , comm_ptr ,
196
280
hdr -> hdr_src , hdr -> hdr_tag , PERUSE_RECV );
@@ -246,12 +330,31 @@ void mca_pml_ob1_recv_frag_callback_match(mca_btl_base_module_t* btl,
246
330
/* don't need a rmb as that is for checking */
247
331
recv_request_pml_complete (match );
248
332
}
249
- return ;
250
333
251
- slow_path :
252
- OB1_MATCHING_UNLOCK (& comm -> matching_lock );
253
- mca_pml_ob1_recv_frag_match (btl , hdr , segments ,
254
- num_segments , MCA_PML_OB1_HDR_TYPE_MATCH );
334
+ /* We matched the frag, Now see if we already have the next sequence in
335
+ * our OOS list. If yes, try to match it.
336
+ *
337
+ * NOTE:
338
+ * To optimize the number of lock used, mca_pml_ob1_recv_frag_match_proc()
339
+ * MUST be called with communicator lock and will RELEASE the lock. This is
340
+ * not ideal but it is better for the performance.
341
+ */
342
+ if (0 != opal_list_get_size (& proc -> frags_cant_match )) {
343
+ mca_pml_ob1_recv_frag_t * frag ;
344
+
345
+ OB1_MATCHING_LOCK (& comm -> matching_lock );
346
+ if ((frag = check_cantmatch_for_match (proc ))) {
347
+ /* mca_pml_ob1_recv_frag_match_proc() will release the lock. */
348
+ mca_pml_ob1_recv_frag_match_proc (frag -> btl , comm_ptr , proc ,
349
+ & frag -> hdr .hdr_match ,
350
+ frag -> segments , frag -> num_segments ,
351
+ hdr -> hdr_common .hdr_type , frag );
352
+ } else {
353
+ OB1_MATCHING_UNLOCK (& comm -> matching_lock );
354
+ }
355
+ }
356
+
357
+ return ;
255
358
}
256
359
257
360
@@ -590,31 +693,6 @@ match_one(mca_btl_base_module_t *btl,
590
693
} while (true);
591
694
}
592
695
593
- static mca_pml_ob1_recv_frag_t * check_cantmatch_for_match (mca_pml_ob1_comm_proc_t * proc )
594
- {
595
- mca_pml_ob1_recv_frag_t * frag ;
596
-
597
- /* search the list for a fragment from the send with sequence
598
- * number next_msg_seq_expected
599
- */
600
- for (frag = (mca_pml_ob1_recv_frag_t * )opal_list_get_first (& proc -> frags_cant_match );
601
- frag != (mca_pml_ob1_recv_frag_t * )opal_list_get_end (& proc -> frags_cant_match );
602
- frag = (mca_pml_ob1_recv_frag_t * )opal_list_get_next (frag ))
603
- {
604
- mca_pml_ob1_match_hdr_t * hdr = & frag -> hdr .hdr_match ;
605
- /*
606
- * If the message has the next expected seq from that proc...
607
- */
608
- if (hdr -> hdr_seq != proc -> expected_sequence )
609
- continue ;
610
-
611
- opal_list_remove_item (& proc -> frags_cant_match , (opal_list_item_t * )frag );
612
- return frag ;
613
- }
614
-
615
- return NULL ;
616
- }
617
-
618
696
/**
619
697
* RCS/CTS receive side matching
620
698
*
@@ -652,12 +730,11 @@ static int mca_pml_ob1_recv_frag_match( mca_btl_base_module_t *btl,
652
730
int type )
653
731
{
654
732
/* local variables */
655
- uint16_t next_msg_seq_expected , frag_msg_seq ;
733
+ uint16_t frag_msg_seq ;
734
+ uint16_t next_msg_seq_expected ;
656
735
ompi_communicator_t * comm_ptr ;
657
- mca_pml_ob1_recv_request_t * match = NULL ;
658
736
mca_pml_ob1_comm_t * comm ;
659
737
mca_pml_ob1_comm_proc_t * proc ;
660
- mca_pml_ob1_recv_frag_t * frag = NULL ;
661
738
662
739
/* communicator pointer */
663
740
comm_ptr = ompi_comm_lookup (hdr -> hdr_ctx );
@@ -676,14 +753,13 @@ static int mca_pml_ob1_recv_frag_match( mca_btl_base_module_t *btl,
676
753
comm = (mca_pml_ob1_comm_t * )comm_ptr -> c_pml_comm ;
677
754
678
755
/* source sequence number */
679
- frag_msg_seq = hdr -> hdr_seq ;
680
756
proc = mca_pml_ob1_peer_lookup (comm_ptr , hdr -> hdr_src );
681
757
682
- /**
683
- * We generate the MSG_ARRIVED event as soon as the PML is aware of a matching
684
- * fragment arrival. Independing if it is received on the correct order or not.
685
- * This will allow the tools to figure out if the messages are not received in the
686
- * correct order (if multiple network interfaces).
758
+ /* We generate the MSG_ARRIVED event as soon as the PML is aware
759
+ * of a matching fragment arrival. Independing if it is received
760
+ * on the correct order or not. This will allow the tools to
761
+ * figure out if the messages are not received in the correct
762
+ * order (if multiple network interfaces).
687
763
*/
688
764
PERUSE_TRACE_MSG_EVENT (PERUSE_COMM_MSG_ARRIVED , comm_ptr ,
689
765
hdr -> hdr_src , hdr -> hdr_tag , PERUSE_RECV );
@@ -697,38 +773,67 @@ static int mca_pml_ob1_recv_frag_match( mca_btl_base_module_t *btl,
697
773
*/
698
774
OB1_MATCHING_LOCK (& comm -> matching_lock );
699
775
700
- /* get sequence number of next message that can be processed */
776
+ frag_msg_seq = hdr -> hdr_seq ;
701
777
next_msg_seq_expected = (uint16_t )proc -> expected_sequence ;
702
- if (OPAL_UNLIKELY (frag_msg_seq != next_msg_seq_expected ))
703
- goto wrong_seq ;
704
778
705
- /*
706
- * This is the sequence number we were expecting,
707
- * so we can try matching it to already posted
708
- * receives.
779
+ /* If the sequence number is wrong, queue it up for later. */
780
+ if (OPAL_UNLIKELY (frag_msg_seq != next_msg_seq_expected )) {
781
+ append_frag_to_ordered_list (& proc -> frags_cant_match , btl , hdr , segments ,
782
+ num_segments , NULL );
783
+ OB1_MATCHING_UNLOCK (& comm -> matching_lock );
784
+ return OMPI_SUCCESS ;
785
+ }
786
+
787
+ /* mca_pml_ob1_recv_frag_match_proc() will release the lock. */
788
+ return mca_pml_ob1_recv_frag_match_proc (btl , comm_ptr , proc , hdr ,
789
+ segments , num_segments ,
790
+ type , NULL );
791
+ }
792
+
793
+
794
+ /* mca_pml_ob1_recv_frag_match_proc() will match the given frag and
795
+ * then try to match the next frag in sequence by looking into arrived
796
+ * out of order frags in frags_cant_match list until it can't find one.
797
+ *
798
+ * ATTENTION: THIS FUNCTION MUST BE CALLED WITH COMMUNICATOR LOCK HELD.
799
+ * THE LOCK WILL BE RELEASED UPON RETURN. USE WITH CARE. */
800
+ static int
801
+ mca_pml_ob1_recv_frag_match_proc ( mca_btl_base_module_t * btl ,
802
+ ompi_communicator_t * comm_ptr ,
803
+ mca_pml_ob1_comm_proc_t * proc ,
804
+ mca_pml_ob1_match_hdr_t * hdr ,
805
+ mca_btl_base_segment_t * segments ,
806
+ size_t num_segments ,
807
+ int type ,
808
+ mca_pml_ob1_recv_frag_t * frag )
809
+ {
810
+ /* local variables */
811
+ mca_pml_ob1_comm_t * comm = (mca_pml_ob1_comm_t * )comm_ptr -> c_pml_comm ;
812
+ mca_pml_ob1_recv_request_t * match = NULL ;
813
+
814
+ /* If we are here, this is the sequence number we were expecting,
815
+ * so we can try matching it to already posted receives.
709
816
*/
710
817
711
- out_of_order_match :
818
+ match_this_frag :
712
819
/* We're now expecting the next sequence number. */
713
820
proc -> expected_sequence ++ ;
714
821
715
- /**
716
- * We generate the SEARCH_POSTED_QUEUE only when the message is received
717
- * in the correct sequence. Otherwise, we delay the event generation until
718
- * we reach the correct sequence number.
822
+ /* We generate the SEARCH_POSTED_QUEUE only when the message is
823
+ * received in the correct sequence. Otherwise, we delay the event
824
+ * generation until we reach the correct sequence number.
719
825
*/
720
826
PERUSE_TRACE_MSG_EVENT (PERUSE_COMM_SEARCH_POSTED_Q_BEGIN , comm_ptr ,
721
- hdr -> hdr_src , hdr -> hdr_tag , PERUSE_RECV );
827
+ hdr -> hdr_src , hdr -> hdr_tag , PERUSE_RECV );
722
828
723
829
match = match_one (btl , hdr , segments , num_segments , comm_ptr , proc , frag );
724
830
725
- /**
726
- * The match is over. We generate the SEARCH_POSTED_Q_END here, before going
727
- * into the mca_pml_ob1_check_cantmatch_for_match so we can make a difference
728
- * for the searching time for all messages.
831
+ /* The match is over. We generate the SEARCH_POSTED_Q_END here,
832
+ * before going into check_cantmatch_for_match we can make a
833
+ * difference for the searching time for all messages.
729
834
*/
730
835
PERUSE_TRACE_MSG_EVENT (PERUSE_COMM_SEARCH_POSTED_Q_END , comm_ptr ,
731
- hdr -> hdr_src , hdr -> hdr_tag , PERUSE_RECV );
836
+ hdr -> hdr_src , hdr -> hdr_tag , PERUSE_RECV );
732
837
733
838
/* release matching lock before processing fragment */
734
839
OB1_MATCHING_UNLOCK (& comm -> matching_lock );
@@ -752,7 +857,7 @@ static int mca_pml_ob1_recv_frag_match( mca_btl_base_module_t *btl,
752
857
753
858
/*
754
859
* Now that new message has arrived, check to see if
755
- * any fragments on the c_c_frags_cant_match list
860
+ * any fragments on the frags_cant_match list
756
861
* may now be used to form new matchs
757
862
*/
758
863
if (OPAL_UNLIKELY (opal_list_get_size (& proc -> frags_cant_match ) > 0 )) {
@@ -763,20 +868,11 @@ static int mca_pml_ob1_recv_frag_match( mca_btl_base_module_t *btl,
763
868
num_segments = frag -> num_segments ;
764
869
btl = frag -> btl ;
765
870
type = hdr -> hdr_common .hdr_type ;
766
- goto out_of_order_match ;
871
+ goto match_this_frag ;
767
872
}
768
873
OB1_MATCHING_UNLOCK (& comm -> matching_lock );
769
874
}
770
875
771
- return OMPI_SUCCESS ;
772
- wrong_seq :
773
- /*
774
- * This message comes after the next expected, so it
775
- * is ahead of sequence. Save it for later.
776
- */
777
- append_frag_to_list (& proc -> frags_cant_match , btl , hdr , segments ,
778
- num_segments , NULL );
779
- OB1_MATCHING_UNLOCK (& comm -> matching_lock );
780
876
return OMPI_SUCCESS ;
781
877
}
782
878
0 commit comments