genomicSimulationC 0.2.6
sim-operations.c
Go to the documentation of this file.
1#ifndef SIM_OPERATIONS
2#define SIM_OPERATIONS
3#include "sim-operations.h"
4/* genomicSimulationC v0.2.6 - last edit 17 Jan 2025 */
5
12 .offspring_name_prefix = NULL,
13 .family_size = 1,
14 .will_track_pedigree = GSC_FALSE,
15 .will_allocate_ids = GSC_TRUE,
16 .filename_prefix = NULL,
17 .will_save_pedigree_to_file = GSC_FALSE,
18 .will_save_bvs_to_file = GSC_NO_EFFECTSET,
19 .will_save_alleles_to_file = GSC_FALSE,
20 //.will_save_recombinations_to_file = GSC_FALSE,
21 .will_save_to_simdata = GSC_TRUE
22};
23
34static void* gsc_malloc_wrap(const size_t size, char exitonfail) {
35 if (size == 0) {
36 fprintf(stderr, "0 memory allocation requested.\n");
37 return NULL;
38 }
39 void* v = GSC_MALLOC(size);
40 if (v == NULL) {
41 if (exitonfail) {
42 fprintf(stderr, "Memory allocation failed. Exiting.\n"); exit(2);
43 } else {
44 fprintf(stderr, "Memory allocation failed.\n");
45 }
46 }
47 return v;
48}
49
63 const GSC_ID_T n_labels,
64 const int* labelDefaults,
65 const GSC_LOCALX_T n_genotypes) {
67
68 m->n_genotypes = n_genotypes;
69 m->n_markers = n_markers;
70 m->n_labels = n_labels;
71 //m->alleles = gsc_malloc_wrap(sizeof(char*) * CONTIG_WIDTH);
72 for (GSC_LOCALX_T i = 0; i < n_genotypes; ++i) {
73 m->alleles[i] = gsc_malloc_wrap(sizeof(char) * (n_markers<<1),GSC_TRUE);
74 memset(m->alleles[i], 0, sizeof(char) * (n_markers<<1));
75 //m->ids[i] = 0;
76 }
77 memset(m->alleles + n_genotypes, 0, sizeof(char*) * (CONTIG_WIDTH - n_genotypes)); // setting the pointers to NULL
78
79 if (n_labels > 0) {
80 m->labels = gsc_malloc_wrap(sizeof(int*) * n_labels,GSC_TRUE);
81 for (GSC_ID_T i = 0; i < n_labels; ++i) {
82 m->labels[i] = gsc_malloc_wrap(sizeof(int) * CONTIG_WIDTH,GSC_TRUE);
83 for (GSC_LOCALX_T j = 0; j < CONTIG_WIDTH; ++j) {
84 m->labels[i][j] = labelDefaults[i];
85 }
86 }
87 } else if (n_labels == 0) {
88 m->labels = NULL;
89 } else {
90 fprintf(stderr, "Invalid negative number of labels provided to gsc_create_empty_allelematrix");
91 m->labels = NULL;
92 }
93
94 memset(m->ids, 0, sizeof(gsc_PedigreeID) * CONTIG_WIDTH);
95 memset(m->pedigrees[0], 0, sizeof(gsc_PedigreeID) * CONTIG_WIDTH);
96 memset(m->pedigrees[1], 0, sizeof(gsc_PedigreeID) * CONTIG_WIDTH);
97 memset(m->groups, 0, sizeof(gsc_GroupNum) * CONTIG_WIDTH);
98 memset(m->names, 0, sizeof(char*) * CONTIG_WIDTH); // setting the pointers to NULL
99
100 m->next = NULL;
101
102 return m;
103}
104
114 d->n_labels = 0;
115 d->label_ids = NULL;
116 d->label_defaults = NULL;
117 d->genome.n_markers = 0;
118 d->genome.marker_names = NULL;
119 d->genome.names_alphabetical = NULL;
120 d->genome.n_maps = 0;
121 d->genome.map_ids = NULL;
122 d->genome.maps = NULL;
123 d->m = NULL;
124 d->n_eff_sets = 0;
125 d->e = NULL;
126 rnd_pcg_seed( &d->rng, RNGseed );
128 d->n_groups = 0;
129 return d;
130}
131
143 // Free label defaults
144 if (d->n_labels > 0) {
145 if (d->label_ids != NULL) {
147 }
148 if (d->label_defaults != NULL) {
150 }
151 }
152
153 // Free other details
155 for (GSC_ID_T i = 0; i < d->n_eff_sets; ++i) {
156 gsc_delete_effect_matrix(&(d->e[i]));
157 }
158 if (d->n_eff_sets > 0) {
160 GSC_FREE(d->e);
161 }
163
164 // Clear all values
165 d->n_labels = 0;
166 d->label_ids = NULL;
167 d->label_defaults = NULL;
168 d->genome.n_markers = 0;
169 d->genome.marker_names = NULL;
170 d->genome.n_maps = 0;
171 d->genome.map_ids = NULL;
172 d->genome.maps = NULL;
173 d->m = NULL;
174 d->n_eff_sets = 0;
175 d->e = NULL;
177 d->n_groups = 0;
178}
179
180/*-------------------------Random generators---------------------------------*/
181
182/* https://www.everything2.com/title/Generating+random+numbers+with+a+Poisson+distribution
183https://en.wikipedia.org/wiki/Poisson_distribution#Generating_Poisson-distributed_random_variables
184*/
195int gsc_randpoi(rnd_pcg_t* rng, double lambda) {
196 if (lambda <= 0) { // invalid parameter.
197 //In this case we use the function to generate number of crossovers
198 // so if parameter/length passed in is invalid, we just want no crossovers
199 return 0;
200 }
201
202 int k = 0;
203 double target = exp(-lambda);
204 double p = rnd_pcg_nextf(rng);
205 while (p > target) {
206 k += 1;
207 p *= rnd_pcg_nextf(rng);
208 }
209 return k;
210}
211
212/*end random generators*/
213
214/*------------------------Supporter Functions--------------------------------*/
215
234struct gsc_TableSize gsc_get_file_dimensions(const char* filename, const char sep) {
235 struct gsc_TableSize details;
236 details.num_columns = 0;
237 details.num_rows = 0;
238
239 FILE* fp;
240 int c; // this is used to store the output of fgetc i.e. the next character in the file
241 if ((fp = fopen(filename, "r")) == NULL) {
242 fprintf(stderr, "Failed to open file %s.\n", filename); exit(1);
243 }
244 c = fgetc(fp);
245
246 while (c != EOF && c != '\n') {
247 //RPACKINSERT R_CheckUserInterrupt();
248 if (c == sep) {
249 details.num_columns += 1; // add count for columns of form [colname]sep
250 }
251 c = fgetc(fp);
252 }
253
254 details.num_columns += 1; // add another column that was bounded by sep[colname][EOF or \n]
255 details.num_rows = 1; // we successfully got the first row
256
257 // now get all the rows. What we care about in the rows is the number of them
258 c = fgetc(fp);
259 int sep_count = 0; // for each row, count the columns to make sure they match and the file is valid
260 int has_length = GSC_FALSE;
261 while (c != EOF) {
262 //RPACKINSERT R_CheckUserInterrupt();
263 if (c == '\n') {
264 details.num_rows += 1; // add count for columns of form [colname]sep
265
266 // check we have right number of columns and reset counter
267 if (has_length && sep_count != details.num_columns-1) {
268 // we have a bad number of columns
269 details.num_columns = 0;
270 fclose(fp);
271 fprintf(stderr, "Bad columns on row %d\n", details.num_rows + 1); exit(1);
272 }
273 sep_count = 0;
274 has_length = GSC_FALSE;
275
276 } else if (c == sep) {
277 sep_count += 1;
278 } else if (has_length == GSC_FALSE) {
279 has_length = GSC_TRUE;
280 }
281 c = fgetc(fp);
282 }
283 if (has_length) {
284 details.num_rows += 1; // for the last row before EOF
285 }
286
287 fclose(fp);
288 return details;
289}
290
312/*int gsc_get_from_ordered_uint_list(const unsigned int target,
313 const unsigned int listLen,
314 const unsigned int* list) {
315 unsigned int first = 0, last = listLen - 1;
316 int index = (first + last) / 2;
317 while (list[index] != target && first <= last) {
318 if (list[index] == 0) {
319 int lookahead = 1;
320 while(1) {
321 if (index+lookahead <= last && list[index+lookahead] != 0) {
322 if (list[index+lookahead] == target) {
323 return index+lookahead;
324 } else if (list[index+lookahead] < target) {
325 first = index+lookahead + 1;
326 break;
327 } else {
328 last = index - 1;
329 break;
330 }
331 } else if (index-lookahead <= last && list[index-lookahead] != 0) {
332 if (list[index-lookahead] == target) {
333 return index-lookahead;
334 } else if (list[index-lookahead] < target) {
335 first = index + 1;
336 break;
337 } else {
338 last = index-lookahead - 1;
339 break;
340 }
341 }
342 ++lookahead;
343 if (index+lookahead <= last || index-lookahead >= first) {
344 // failed to find any nonzeros between first and last
345 return -1;
346 }
347 }
348
349 } else { // No need to dodge 0. Normal binary search.
350 if (list[index] == target) {
351 return index;
352 } else if (list[index] < target) {
353 first = index + 1;
354 } else {
355 last = index - 1;
356 }
357
358 }
359 // index has been updated, no matter the branch.
360 index = (first + last) / 2;
361 }
362
363 if (first > last) {
364 return -1;
365 }
366 return index;
367}*/
368
391 const GSC_LOCALX_T listLen,
392 const gsc_PedigreeID* list) {
393 GSC_LOCALX_T first = 0, last = listLen - 1;
394 GSC_LOCALX_T index = (first + last) / 2;
395 while (list[index].id != target.id && first <= last) {
396 if (list[index].id == GSC_NO_PEDIGREE.id) {
397 int lookahead = 1;
398 while(1) {
399 if (index+lookahead <= last && list[index+lookahead].id != GSC_NO_PEDIGREE.id) {
400 if (list[index+lookahead].id == target.id) {
401 return index+lookahead;
402 } else if (list[index+lookahead].id < target.id) {
403 first = index+lookahead + 1;
404 break;
405 } else {
406 last = index - 1;
407 break;
408 }
409 } else if (index-lookahead <= last && list[index-lookahead].id != GSC_NO_PEDIGREE.id) {
410 if (list[index-lookahead].id == target.id) {
411 return index-lookahead;
412 } else if (list[index-lookahead].id < target.id) {
413 first = index + 1;
414 break;
415 } else {
416 last = index-lookahead - 1;
417 break;
418 }
419 }
420 ++lookahead;
421 if (index+lookahead <= last || index-lookahead >= first) {
422 // failed to find any nonzeros between first and last
423 return GSC_NA_LOCALX;
424 }
425 }
426
427 } else { // No need to dodge 0. Normal binary search.
428 if (list[index].id == target.id) {
429 return index;
430 } else if (list[index].id < target.id) {
431 first = index + 1;
432 } else {
433 last = index - 1;
434 }
435
436 }
437 // index has been updated, no matter the branch.
438 index = (first + last) / 2;
439 }
440
441 if (first > last) {
442 return GSC_NA_LOCALX;
443 }
444 return index;
445}
446
463size_t gsc_get_from_unordered_str_list(const char* target,
464 const size_t listLen,
465 const char** list) {
466 for (size_t i = 0; i < listLen; ++i) {
467 if (strcmp(list[i], target) == 0) {
468 return i;
469 }
470 }
471 return SIZE_MAX; // did not find a match.
472}
473
490size_t gsc_get_from_ordered_str_list(const char* target,
491 const size_t listLen,
492 const char** list) {
493 size_t first = 0, last = listLen - 1;
494 size_t index = (first + last) / 2;
495 int comparison = strcmp(target,list[index]);
496 while (comparison != 0 && first <= last) {
497 if (comparison == 0) {
498 return index;
499 } else if (comparison < 0) {
500 first = index + 1;
501 } else {
502 last = index - 1;
503 }
504
505 // index has been updated, no matter the branch.
506 index = (first + last) / 2;
507 comparison = strcmp(target, list[index]);
508 }
509
510 if (first > last) {
511 return SIZE_MAX;
512 }
513 return index;
514}
515
516
535void gsc_shuffle_up_to(rnd_pcg_t* rng,
536 void* sequence,
537 const size_t item_size,
538 const size_t total_n,
539 const size_t n_to_shuffle) {
540 if (n_to_shuffle > 1) {
541
542 size_t tmp_spot;
543 void* tmp = &tmp_spot;
544 if (item_size > sizeof(tmp_spot)) {
545 tmp = gsc_malloc_wrap(item_size, GSC_TRUE);
546 }
547
548 size_t maxi = total_n > n_to_shuffle ? n_to_shuffle - 1 : total_n - 1;
549 size_t i;
550 for (i = 0; i <= maxi; ++i) {
551 // items before i are already shuffled
552 size_t j = i + rnd_pcg_range(rng,0,total_n - i - 1);
553
554 // add the next chosen value to the end of the shuffle
555 memcpy(&tmp, sequence + j*item_size, item_size);
556 memcpy(sequence + j*item_size, sequence + i*item_size, item_size);
557 memcpy(sequence + i*item_size, &tmp, item_size);
558 }
559
560 if (item_size > sizeof(tmp_spot)) {
561 free(tmp);
562 }
563 }
564}
565
581 const char* prefix,
582 const int suffix,
583 const GSC_LOCALX_T from_index) {
584 char sname[NAME_LENGTH];
585 char format[NAME_LENGTH];
586 if (prefix == NULL) {
587 // make it an empty string instead, so it is not displayed as (null)
588 prefix = "";
589 }
590 // use sname to save the number of digits to pad by:
591 sprintf(sname, "%%0%dd", gsc_get_integer_digits(a->n_genotypes - from_index)); // Creates: %0[n]d
592 sprintf(format, "%s%s", prefix, sname);
593
594 int livingsuffix = suffix;
595 ++livingsuffix;
596 for (GSC_LOCALX_T i = from_index; i < a->n_genotypes; ++i) {
597 // clear name if it's pre-existing
598 if (a->names[i] != NULL) {
599 GSC_FREE(a->names[i]);
600 }
601
602 // save new name
603 sprintf(sname, format, livingsuffix);
604 a->names[i] = gsc_malloc_wrap(sizeof(char) * (strlen(sname) + 1),GSC_TRUE);
605 strcpy(a->names[i], sname);
606
607 ++livingsuffix;
608 }
609}
610
623 // Add new label default
624 if (d->n_labels == 0) {
626 d->label_ids[0] = (gsc_LabelID){.id=1};
627
628 d->label_defaults = gsc_malloc_wrap(sizeof(int) * 1,GSC_TRUE);
629 d->label_defaults[0] = setTo;
630
631 } else if (d->n_labels > 0) {
632
633 gsc_LabelID* new_label_ids;
634 if (d->label_ids != NULL) {
635 new_label_ids = gsc_malloc_wrap(sizeof(gsc_LabelID) * (d->n_labels + 1),GSC_TRUE);
636 memcpy(new_label_ids,d->label_ids,sizeof(gsc_LabelID)*d->n_labels);
637 new_label_ids[d->n_labels] = gsc_get_new_label_id(d);
639
640 } else { // d->label_ids == NULL
641 // If the other labels do not have identifiers, they're corrupted and
642 // deserve to be destroyed.
643 new_label_ids = gsc_malloc_wrap(sizeof(gsc_LabelID) * 1,GSC_TRUE);
644 d->n_labels = 0;
645 new_label_ids[d->n_labels] = gsc_get_new_label_id(d);
646 }
647 d->label_ids = new_label_ids;
648
649 int* new_label_defaults = gsc_malloc_wrap(sizeof(int) * (d->n_labels + 1),GSC_TRUE);
650 if (d->label_defaults != NULL) {
651 for (GSC_ID_T i = 0; i < d->n_labels; ++i) {
652 new_label_defaults[i] = d->label_defaults[i];
653 }
655 } else if (d->n_labels > 0) {
656 memset(new_label_defaults, 0, sizeof(int) * d->n_labels);
657 }
658 new_label_defaults[d->n_labels] = setTo;
659 d->label_defaults = new_label_defaults;
660
661 } else {
662 fprintf(stderr, "Labels malformed; gsc_SimData may be corrupted\n");
663 return (gsc_LabelID){.id=GSC_NA_ID};
664 }
665 d->n_labels += 1;
666
667 // Set all values of that label to the default
668 gsc_AlleleMatrix* m = d->m;
669 int warned = GSC_FALSE;
670 do {
671 // Do we need to destroy the extant label table? happens if label_ids were missing and we discarded them
672 if (m->n_labels != d->n_labels - 1 && m->labels != NULL) {
673 for (GSC_ID_T i = 0; i < m->n_labels; ++i) {
674 GSC_FREE(m->labels[i]);
675 }
676 GSC_FREE(m->labels);
677 m->labels = NULL;
678 }
679
680 m->n_labels = d->n_labels;
681
682 // Consider the case when we need to expand the label list
683 if (m->n_labels > 1 && m->labels != NULL) {
684 GSC_ID_T newLabel = m->n_labels - 1;
685
686 // Create label list
687 int** oldLabelList = m->labels;
688 m->labels = gsc_malloc_wrap(sizeof(int*) * m->n_labels,GSC_TRUE);
689 for (GSC_ID_T i = 0; i < m->n_labels - 1; ++i) {
690 m->labels[i] = oldLabelList[i];
691 }
692 m->labels[newLabel] = gsc_malloc_wrap(sizeof(int) * CONTIG_WIDTH,GSC_TRUE);
693 GSC_FREE(oldLabelList);
694
695 // Set labels
696 if (setTo == 0) {
697 memset(m->labels[newLabel], 0, sizeof(int) * CONTIG_WIDTH);
698 } else {
699 for (GSC_LOCALX_T i = 0; i < CONTIG_WIDTH; ++i) {
700 m->labels[newLabel][i] = setTo;
701 }
702 }
703
704 // Consider the case we need to initialise the label list
705 } else if (m->n_labels == 1 && m->labels == NULL) {
706 // Create the label list
707 m->labels = gsc_malloc_wrap(sizeof(int*) * 1,GSC_TRUE);
708 m->labels[0] = gsc_malloc_wrap(sizeof(int) * CONTIG_WIDTH,GSC_TRUE);
709
710 // Set labels
711 if (setTo == 0) {
712 memset(m->labels[0], 0, sizeof(int) * CONTIG_WIDTH);
713 } else {
714 for (GSC_LOCALX_T i = 0; i < CONTIG_WIDTH; ++i) {
715 m->labels[0][i] = setTo;
716 }
717 }
718
719 } else if (!warned) {
720 fprintf(stderr, "Unable to create new label for all genotypes; gsc_SimData may be corrupted\n");
721 warned = GSC_TRUE;
722 }
723
724 } while ((m = m->next) != NULL);
725 return d->label_ids[d->n_labels - 1];
726}
727
740 const gsc_LabelID whichLabel,
741 const int newDefault) {
742 GSC_ID_T labelIndex;
743 if (whichLabel.id == GSC_NO_LABEL.id || (labelIndex = gsc_get_index_of_label(d, whichLabel)) == GSC_NA_IDX) {
744 fprintf(stderr, "Nonexistent label %lu\n", (long unsigned int) whichLabel.id);
745 return;
746 }
747 d->label_defaults[labelIndex] = newDefault;
748}
749
766 const gsc_GroupNum whichGroup,
767 const gsc_LabelID whichLabel,
768 const int setTo) {
769 GSC_ID_T labelIndex;
770 if (whichLabel.id == GSC_NO_LABEL.id || (labelIndex = gsc_get_index_of_label(d, whichLabel)) == GSC_NA_IDX) {
771 fprintf(stderr, "Nonexistent label %lu\n", (long unsigned int) whichLabel.id);
772 return;
773 }
774 // Risks: if m->labels or m->labels[i] don't exist for labels where they should,
775 // will get some out of bounds accesses.
776
777 gsc_AlleleMatrix* m = d->m;
778 if (whichGroup.num != GSC_NO_GROUP.num) { // set the labels of group members
779 do {
780
781 for (GSC_LOCALX_T i = 0; i < m->n_genotypes; ++i) {
782 if (m->groups[i].num == whichGroup.num) {
783 m->labels[labelIndex][i] = setTo;
784 }
785 }
786
787 } while ((m = m->next) != NULL);
788
789 } else { // whichGroup == 0 so set the labels of all genotypes
790 do {
791
792 if (setTo == 0) {
793 memset(m->labels[labelIndex], 0, sizeof(int) * m->n_genotypes);
794 } else {
795 for (GSC_LOCALX_T i = 0; i < m->n_genotypes; ++i) {
796 m->labels[labelIndex][i] = setTo;
797 }
798 }
799
800 } while ((m = m->next) != NULL);
801 }
802}
803
822 const gsc_GroupNum whichGroup,
823 const gsc_LabelID whichLabel,
824 const int byValue) {
825 GSC_ID_T labelIndex;
826 if (whichLabel.id == GSC_NO_LABEL.id || (labelIndex = gsc_get_index_of_label(d, whichLabel)) == GSC_NA_IDX) {
827 fprintf(stderr, "Nonexistent label %lu\n", (long unsigned int) whichLabel.id);
828 return;
829 }
830 // Risks: if m->labels or m->labels[i] don't exist for labels where they should,
831 // will get some out of bounds accesses.
832
833 gsc_AlleleMatrix* m = d->m;
834 if (whichGroup.num != GSC_NO_GROUP.num) { // set the labels of group members
835 do {
836
837 for (GSC_LOCALX_T i = 0; i < m->n_genotypes; ++i) {
838 if (m->groups[i].num == whichGroup.num) {
839 m->labels[labelIndex][i] += byValue;
840 }
841 }
842
843 } while ((m = m->next) != NULL);
844
845 } else { // whichGroup == 0 so set the labels of all genotypes
846 do {
847
848 for (GSC_LOCALX_T i = 0; i < m->n_genotypes; ++i) {
849 m->labels[labelIndex][i] += byValue;
850 }
851
852 } while ((m = m->next) != NULL);
853 }
854
855}
856
881 const gsc_GroupNum whichGroup,
882 const GSC_GLOBALX_T startIndex,
883 const gsc_LabelID whichLabel,
884 const size_t n_values,
885 const int* values) {
886 GSC_ID_T labelIndex;
887 if (whichLabel.id == GSC_NO_LABEL.id || (labelIndex = gsc_get_index_of_label(d, whichLabel)) == GSC_NA_IDX) {
888 fprintf(stderr, "Nonexistent label %lu\n", (long unsigned int) whichLabel.id);
889 return;
890 }
891
892 gsc_AlleleMatrix* m = d->m;
893 GSC_GLOBALX_T currentIndex = 0;
894 if (whichGroup.num != GSC_NO_GROUP.num) { // set the labels of group members
895 // First scan through to find firstIndex
896 do {
897
898 for (GSC_LOCALX_T i = 0; i < m->n_genotypes; ++i) {
899 if (m->groups[i].num == whichGroup.num) {
900 // Update label if it is between startIndex and startIndex + n_values
901 if (currentIndex >= startIndex) {
902 m->labels[labelIndex][i] = values[currentIndex - startIndex];
903 }
904 currentIndex++;
905 if (currentIndex > startIndex && currentIndex - startIndex >= n_values) {
906 return;
907 }
908 }
909 }
910
911 } while ((m = m->next) != NULL);
912
913 } else { // whichGroup == 0 so set the labels of all genotypes
914 do {
915
916 for (GSC_LOCALX_T i = 0; i < m->n_genotypes; ++i) {
917 // Update label if it is between startIndex and startIndex + n_values
918 if (currentIndex >= startIndex) {
919 m->labels[labelIndex][i] = values[currentIndex - startIndex];
920 }
921 currentIndex++;
922 if (currentIndex > startIndex && currentIndex - startIndex >= n_values) {
923 return;
924 }
925 }
926
927 } while ((m = m->next) != NULL);
928 }
929}
930
959 const gsc_GroupNum whichGroup,
960 const GSC_GLOBALX_T startIndex,
961 const size_t n_values,
962 const char** values) {
963 // this will be much improved once we can hash our names.
964
965 gsc_AlleleMatrix* m = d->m;
966 GSC_GLOBALX_T currentIndex = 0;
967 if (whichGroup.num != GSC_NO_GROUP.num) { // set the names of group members
968 // First scan through to find firstIndex
969 do {
970
971 for (GSC_LOCALX_T i = 0; i < m->n_genotypes; ++i) {
972 if (m->groups[i].num == whichGroup.num) {
973 // Update name if index is between startIndex and startIndex + n_values
974 if (currentIndex >= startIndex) {
975 // clear name if it's pre-existing
976 if (m->names[i] != NULL) {
977 GSC_FREE(m->names[i]);
978 }
979
980 // save new name
981 const GSC_GLOBALX_T whichName = currentIndex - startIndex;
982 m->names[i] = gsc_malloc_wrap(sizeof(char) * (strlen(values[whichName]) + 1),GSC_TRUE);
983 strcpy(m->names[i], values[whichName]);
984 }
985 currentIndex++;
986 if (currentIndex > n_values) {
987 return;
988 }
989 }
990 }
991
992 } while ((m = m->next) != NULL);
993
994 } else { // whichGroup == 0 so set the names of all genotypes
995 do {
996
997 for (GSC_LOCALX_T i = 0; i < m->n_genotypes; ++i) {
998 // Update name if it is between startIndex and startIndex + n_values
999 if (currentIndex >= startIndex) {
1000 // clear name if it's pre-existing
1001 if (m->names[i] != NULL) {
1002 GSC_FREE(m->names[i]);
1003 }
1004
1005 // save new name
1006 const GSC_GLOBALX_T whichName = currentIndex - startIndex;
1007 const int nameLen = strlen(values[whichName]);
1008 m->names[i] = gsc_malloc_wrap(sizeof(char) * (nameLen + 1),GSC_TRUE);
1009 strncpy(m->names[i], values[whichName], nameLen);
1010 }
1011 currentIndex++;
1012 if (currentIndex > n_values) {
1013 return;
1014 }
1015 }
1016
1017 } while ((m = m->next) != NULL);
1018 }
1019}
1020
1042 const char* which_marker,
1043 const char from,
1044 const char to) {
1045 GSC_GENOLEN_T nmarkers = 0;
1046 GSC_GLOBALX_T ngenos = 0;
1047 unsigned int nalleles = 0;
1048
1049 GSC_GENOLEN_T markeri;
1050 if (which_marker == NULL) {
1053
1054 while (IS_VALID_LOCATION(loc)) {
1055 for (GSC_GENOLEN_T m = 0; m < d->genome.n_markers; ++m) {
1056 if (from == loc.localAM->alleles[loc.localPos][m << 1]) {
1057 loc.localAM->alleles[loc.localPos][m << 1] = to;
1058 ++nalleles;
1059 ++ngenos;
1060 }
1061 if (from == loc.localAM->alleles[loc.localPos][(m << 1) + 1]) {
1062 loc.localAM->alleles[loc.localPos][(m << 1) + 1] = to;
1063 ++nalleles;
1064 if (loc.localAM->alleles[loc.localPos][m << 1] !=
1065 loc.localAM->alleles[loc.localPos][(m << 1) + 1]) {
1066 ++ngenos;
1067 }
1068 }
1069 }
1070
1071 loc = gsc_next_forwards(&it);
1072 }
1073
1075
1076
1077 } else if (gsc_get_index_of_genetic_marker(which_marker, d->genome, &markeri)) {
1078 nmarkers = 1;
1081 while (IS_VALID_LOCATION(loc)) {
1082 if (from == loc.localAM->alleles[loc.localPos][markeri << 1]) {
1083 loc.localAM->alleles[loc.localPos][markeri << 1] = to;
1084 ++nalleles;
1085 ++ngenos;
1086 }
1087 if (from == loc.localAM->alleles[loc.localPos][(markeri << 1) + 1]) {
1088 loc.localAM->alleles[loc.localPos][(markeri << 1) + 1] = to;
1089 ++nalleles;
1090 if (loc.localAM->alleles[loc.localPos][markeri << 1] !=
1091 loc.localAM->alleles[loc.localPos][(markeri << 1) + 1]) {
1092 ++ngenos;
1093 }
1094 }
1095
1096 loc = gsc_next_forwards(&it);
1097 }
1098
1100
1101 } else {
1102 nmarkers = 0;
1103 ngenos = 0;
1104 }
1105
1106 printf("Changed allele %c to %c %lu times across %lu markers and %lu genotypes\n",
1107 from, to, (long unsigned int)nalleles, (long unsigned int)nmarkers, (long unsigned int)ngenos);
1108}
1109
1115int gsc_get_integer_digits(const int i) {
1116 int digits = 0, ii = i;
1117 while (ii != 0) {
1118 ii = ii / 10;
1119 digits ++;
1120 }
1121 return digits;
1122}
1123
1131static int gsc_helper_descending_pdouble_comparer(const void* pp0, const void* pp1) {
1132 double d0 = **(double **)pp0;
1133 double d1 = **(double **)pp1;
1134 if (d0 > d1) {
1135 return -1;
1136 } else {
1137 return (d0 < d1); // 0 if equal, 1 if d0 is smaller
1138 }
1139}
1140
1147static int gsc_helper_ascending_double_comparer(const void* pp0, const void* pp1) {
1148 double d0 = *(double *)pp0;
1149 double d1 = *(double *)pp1;
1150 if (d0 < d1) {
1151 return -1;
1152 } else {
1153 return (d0 > d1); // 0 if equal, 1 if d0 is smaller
1154 }
1155}
1156
1164static int gsc_helper_ascending_pdouble_comparer(const void* pp0, const void* pp1) {
1165 double d0 = **(double **)pp0;
1166 double d1 = **(double **)pp1;
1167 if (d0 < d1) {
1168 return -1;
1169 } else {
1170 return (d0 > d1); // 0 if equal, 1 if d0 is smaller
1171 }
1172}
1173
1177static int gsc_helper_indirect_alphabetical_str_comparer(const void* p0, const void* p1) {
1178 char* str1 = **(char***)p0;
1179 char* str2 = **(char***)p1;
1180 return strcmp(str1,str2);
1181}
1182
1186static int gsc_helper_mapfileunit_ascending_chr_comparer(const void* p0, const void* p1) {
1187 struct gsc_MapfileUnit s0 = *(struct gsc_MapfileUnit*)p0;
1188 struct gsc_MapfileUnit s1 = *(struct gsc_MapfileUnit*)p1;
1189 //return s0.ul - s1.ul;
1190 return (s0.chr < s1.chr) ? -1 : (s0.chr > s1.chr);
1191}
1192
1196static int gsc_helper_mapfileunit_ascending_d_comparer(const void* p0, const void* p1) {
1197 struct gsc_MapfileUnit s0 = *(struct gsc_MapfileUnit*)p0;
1198 struct gsc_MapfileUnit s1 = *(struct gsc_MapfileUnit*)p1;
1199 return (s0.pos < s1.pos) ? -1 : (s0.pos > s1.pos);
1200}
1201
1218 gsc_GenoLocation to,
1219 int* label_defaults) {
1220 if (to.localAM == from.localAM && to.localPos == from.localPos) {
1221 return;
1222 }
1223 if (to.localAM->groups[to.localPos].num != GSC_NO_GROUP.num) {
1224 fprintf(stderr,"In moving a genotype from %p:%lu to %p:%lu, the genotype at %p:%lu will be overwritten\n",
1225 from.localAM, (long unsigned int)from.localPos, to.localAM, (long unsigned int)to.localPos,
1226 to.localAM, (long unsigned int)to.localPos);
1227 --to.localAM->n_genotypes;
1228 }
1229 to.localAM->alleles[to.localPos] = from.localAM->alleles[from.localPos];
1230 from.localAM->alleles[from.localPos] = NULL;
1231
1232 to.localAM->names[to.localPos] = from.localAM->names[from.localPos];
1233 from.localAM->names[from.localPos] = NULL;
1234
1235 to.localAM->ids[to.localPos] = from.localAM->ids[from.localPos];
1236 from.localAM->ids[from.localPos] = GSC_NO_PEDIGREE;
1237
1238 to.localAM->pedigrees[0][to.localPos] = from.localAM->pedigrees[0][from.localPos];
1239 from.localAM->pedigrees[0][from.localPos] = GSC_NO_PEDIGREE;
1240 to.localAM->pedigrees[1][to.localPos] = from.localAM->pedigrees[1][from.localPos];
1241 from.localAM->pedigrees[1][from.localPos] = GSC_NO_PEDIGREE;
1242
1243 to.localAM->groups[to.localPos] = from.localAM->groups[from.localPos];
1244 from.localAM->groups[from.localPos] = GSC_NO_GROUP;
1245
1246 if (to.localAM->n_labels != from.localAM->n_labels) {
1247 fprintf(stderr,"Origin and destination when copying genotype do not have the same number of custom"
1248 " labels (n_labels). The genotype now at %p:%lu will have lost its label data\n",
1249 to.localAM, (long unsigned int)to.localPos);
1250 } else if (to.localAM->n_labels != 0 && label_defaults == NULL) {
1251 fprintf(stderr,"Label defaults must be supplied to gsc_move_genotypes or there is risk of "
1252 "corrupted label values in further use of the simulation");
1253 } else {
1254 for (GSC_ID_T i = 0; i < to.localAM->n_labels; ++i) {
1255 to.localAM->labels[i][to.localPos] = from.localAM->labels[i][from.localPos];
1256 from.localAM->labels[i][from.localPos] = label_defaults[i];
1257 }
1258 }
1259
1260 if (from.localAM != to.localAM) {
1261 --from.localAM->n_genotypes;
1262 ++to.localAM->n_genotypes;
1263 }
1264}
1265
1272 if (it->cursor.localAM == NULL) {
1274 } else if (it->cursor.localPos >= CONTIG_WIDTH) {
1275 it->cursor.localPos = 0;
1276 it->cursor.localAM = it->cursor.localAM->next;
1277 ++it->cursorAMIndex;
1278 if (it->cursor.localAM == NULL) {
1280 }
1281 }
1282 return it->cursor;
1283}
1284
1294 }
1295
1296 while (it->cursor.localAM->groups[it->cursor.localPos].num != GSC_NO_GROUP.num) {
1297
1298 // Trusts that n_genotypes is correct.
1299 if (it->cursor.localAM->n_genotypes == CONTIG_WIDTH) { // work-saver: skip this gsc_AlleleMatrix if it is already known to be full.
1300 it->cursor.localAM = it->cursor.localAM->next;
1301 ++it->cursorAMIndex;
1302 } else {
1303 ++it->cursor.localPos;
1304 }
1305
1308 }
1309 }
1310
1311 return it->cursor;
1312}
1313
1323 }
1324
1325 while (it->cursor.localAM->groups[it->cursor.localPos].num == GSC_NO_GROUP.num) {
1326 ++it->cursor.localPos;
1329 }
1330 }
1331
1332 return it->cursor;
1333}
1334
1335
1361 // Find the first gap
1362 struct gsc_GappyIterator filler = {.cursor=(gsc_GenoLocation){.localAM=d->m, .localPos=0},
1363 .cursorAMIndex=0};
1364 gsc_nextgappy_get_gap(&filler);
1365
1366 if (!GSC_IS_VALID_LOCATION(filler.cursor)) {
1367 return; // no gaps found
1368 }
1369
1370 struct gsc_GappyIterator checker = filler; // copy filler
1371 ++checker.cursor.localPos;
1372 gsc_nextgappy_get_nongap(&checker);
1373
1374 // Shuffle all candidates back
1375 while (GSC_IS_VALID_LOCATION(filler.cursor) && GSC_IS_VALID_LOCATION(checker.cursor)) {
1376 gsc_move_genotype(checker.cursor, filler.cursor, d->label_defaults);
1377
1378 ++filler.cursor.localPos;
1379 gsc_nextgappy_get_gap(&filler);
1380
1381 ++checker.cursor.localPos;
1382 gsc_nextgappy_get_nongap(&checker);
1383 }
1384
1385 // Then, free any other pre-allocated space
1386 while (GSC_IS_VALID_LOCATION(filler.cursor)) {
1387 if (filler.cursor.localAM->n_genotypes == 0) {
1388 // no genotypes after this point
1389 AlleleMatrix* previous = gsc_get_nth_AlleleMatrix(d->m, filler.cursorAMIndex - 1);
1390 if (previous != NULL) {
1391 previous->next = NULL;
1393 }
1394 filler.cursor.localAM = NULL;
1395
1396 } else {
1397 // If this gap has allocated space, clear it.
1398 if (gsc_get_alleles(filler.cursor) != NULL) {
1400 filler.cursor.localAM->alleles[filler.cursor.localPos] = NULL;
1401 }
1402 if (gsc_get_name(filler.cursor) != NULL) {
1403 GSC_FREE(gsc_get_name(filler.cursor));
1404 filler.cursor.localAM->names[filler.cursor.localPos] = NULL;
1405 }
1406 filler.cursor.localAM->ids[filler.cursor.localPos] = GSC_NO_PEDIGREE;
1409 filler.cursor.localAM->groups[filler.cursor.localPos] = GSC_NO_GROUP;
1410
1411 ++filler.cursor.localPos;
1412 gsc_nextgappy_get_gap(&filler);
1413 }
1414 }
1415}
1416
1417
1418
1419/*----------------------------------Locators---------------------------------*/
1420
1421
1445 const gsc_GroupNum group) {
1446 return gsc_create_bidirectional_iter_fromAM(d->m, group);
1447}
1448
1450 const gsc_GroupNum group) {
1451 return (gsc_BidirectionalIterator) {
1452 .am = am,
1453 .group = group,
1454 .localPos = GSC_NA_LOCALX,
1455
1456 .cachedAM = am,
1457 .cachedAMIndex = 0,
1458
1459 .atStart = 0,
1460 .atEnd = 0
1461 };
1462}
1463
1492 GSC_LOCALX_T first = 0;
1493 gsc_AlleleMatrix* firstAM = d->m;
1494 _Bool anyExist = 1;
1495
1496 // Want to know:
1497 // - is this group empty? (randomAccess should know if group size is 0)
1498 // - what is the first genotype index in this group?
1499
1500 if (group.num == GSC_NO_GROUP.num) { // scanning all genotypes
1501 while (firstAM->n_genotypes == 0) {
1502 if (firstAM->next == NULL) {
1503 // gsc_SimData is empty. Nowhere to go.
1504 anyExist = 0;
1505 } else { // Keep moving forwards through the list. Not polite enough to clean up the blank AM.
1506 firstAM = firstAM->next;
1507 }
1508 }
1509
1510 } else { // scanning a specific group
1511 _Bool exitNow = 0;
1512 while (!exitNow) {
1513
1514 // Set first, firstAM, firstAMIndex if appropriate
1515 for (GSC_LOCALX_T i = 0; i < firstAM->n_genotypes; ++i) {
1516 if (firstAM->groups[i].num == group.num) {
1517 first = i;
1518 exitNow = 1;
1519 break;
1520 }
1521 }
1522
1523 // Move along and set anyExist if appropriate
1524 if (!exitNow) {
1525 firstAM = firstAM->next;
1526 if (firstAM == NULL) {
1527 anyExist = 0;
1528 exitNow = 1;
1529 }
1530 }
1531 }
1532 }
1533
1534 gsc_GenoLocation* cache = NULL;
1535 GSC_GLOBALX_T cacheSize = 0;
1536 if (anyExist) {
1537 cacheSize = 50;
1538 cache = gsc_malloc_wrap((sizeof(gsc_GenoLocation)*cacheSize),GSC_TRUE);
1539 cache[0] = (gsc_GenoLocation) {
1540 .localAM= firstAM,
1541 .localPos = first,
1542 };
1543 for (GSC_GLOBALX_T i = 1; i < cacheSize; ++i) {
1544 cache[i] = GSC_INVALID_GENO_LOCATION;
1545 }
1546
1547 }
1548
1549 return (gsc_RandomAccessIterator) {
1550 .d = d,
1551 .group = group,
1552
1553 .largestCached = anyExist ? 0 : GSC_NA_GLOBALX,
1554 .groupSize = anyExist ? GSC_NA_GLOBALX : 0, // NA represents unknown, 0 represents empty
1555 .cacheSize = cacheSize,
1556 .cache = cache
1557 };
1558}
1559
1570 unsigned int currentIndex = 0;
1571 gsc_AlleleMatrix* am = listStart;
1572 while (currentIndex < n) {
1573 if (am->next == NULL) {
1574 return NULL;
1575 } else {
1576 am = am->next;
1577 currentIndex++;
1578 }
1579 }
1580 return am;
1581}
1582
1594 GSC_LOCALX_T first = 0;
1595 gsc_AlleleMatrix* firstAM = it->am;
1596 unsigned int firstAMIndex = 0;
1597 _Bool anyExist = 1;
1598
1599 // Want to know:
1600 // - is this group empty? (iterator should know if it is at the end as well as at the start)
1601 // - what is the first genotype index in this group?
1602
1603 if (it->group.num == GSC_NO_GROUP.num) {
1604 while (firstAM->n_genotypes == 0) {
1605 if (firstAM->next == NULL) {
1606 anyExist = 0; // gsc_SimData is empty.
1607
1608 } else { // (Not polite enough to clean up the blank AM.)
1609 firstAM = firstAM->next;
1610 firstAMIndex++;
1611 // first += 0;
1612 }
1613 }
1614
1615 // After this runs we have set firstAM, first, firstAMIndex, anyExist appropriately
1616
1617 } else { // scanning a specific group
1618
1619 _Bool exitNow = 0;
1620 while (!exitNow) {
1621
1622 // Set first, firstAM, firstAMIndex if appropriate
1623 for (GSC_LOCALX_T i = 0; i < firstAM->n_genotypes; ++i) {
1624 if (firstAM->groups[i].num == it->group.num) {
1625 first = i;
1626 exitNow = 1;
1627 break;
1628 }
1629 }
1630
1631 // Move along and set anyExist if appropriate
1632 if (!exitNow) {
1633 firstAM = firstAM->next;
1634 firstAMIndex++;
1635 if (firstAM == NULL) {
1636 first = GSC_NA_LOCALX;
1637 anyExist = 0;
1638 exitNow = 1;
1639 }
1640 }
1641 }
1642 }
1643
1644 it->localPos = first;
1645 if (anyExist) {
1646 it->atStart = 1;
1647 it->atEnd = 0;
1648 } else { // fail immediately on all further accesses. The group is empty.
1649 it->atStart = 1;
1650 it->atEnd = 1;
1651 }
1652 it->cachedAM = firstAM;
1653 it->cachedAMIndex = firstAMIndex;
1654
1655 return (gsc_GenoLocation) {
1656 .localAM = firstAM,
1657 .localPos = first
1658 };
1659}
1660
1672 GSC_LOCALX_T last = 0;
1673 gsc_AlleleMatrix* lastAM = it->am;
1674 unsigned int lastAMIndex = 0;
1675 _Bool anyExist = 1;
1676
1677 // Want to know:
1678 // - is this group empty? (iterator should know if it is at the end as well as at the start)
1679 // - what is the first genotype index in this group?
1680
1681 if (it->group.num == GSC_NO_GROUP.num) {
1682 while (lastAM->next != NULL && lastAM->next->n_genotypes != 0) {
1683 lastAM = lastAM->next;
1684 lastAMIndex++;
1685 }
1686 if (lastAMIndex > 0 || lastAM->n_genotypes > 0) {
1687 last = lastAM->n_genotypes - 1;
1688 } else {
1689 anyExist = 0;
1690 }
1691
1692 } else { // scanning a specific group
1693
1694 // Find last AM
1695 while (lastAM->next != NULL && lastAM->next->n_genotypes != 0) {
1696 lastAM = lastAM->next;
1697 lastAMIndex++;
1698 }
1699
1700 _Bool exitNow = 0;
1701 while (!exitNow) {
1702
1703 // Set first, firstAM, firstAMIndex if appropriate
1704 for (GSC_LOCALX_T i = lastAM->n_genotypes - 1; i >= 0; --i) {
1705 if (lastAM->groups[i].num == it->group.num) {
1706 last = i;
1707 exitNow = 1;
1708 break;
1709 }
1710 }
1711
1712 // Move along and set anyExist if appropriate
1713 if (!exitNow) {
1714 --lastAMIndex;
1715 lastAM = gsc_get_nth_AlleleMatrix(it->am, lastAMIndex);
1716 if (lastAM->n_genotypes == 0) {
1717 last = GSC_NA_LOCALX;
1718 anyExist = 0;
1719 exitNow = 1;
1720 }
1721 }
1722 }
1723 }
1724
1725 it->localPos = last;
1726 if (anyExist) {
1727 it->atStart = 0;
1728 it->atEnd = 1;
1729 } else { // group is empty: fail immediately on any further accesses
1730 it->atStart = 1;
1731 it->atEnd = 1;
1732 }
1733 it->cachedAM = lastAM;
1734 it->cachedAMIndex = lastAMIndex;
1735
1736 return (gsc_GenoLocation) {
1737 .localAM = lastAM,
1738 .localPos = last
1739 };
1740}
1741
1742
1763 if (it->localPos == GSC_NA_LOCALX) {
1765 }
1766
1767 if (it->atEnd) { // || validate_bidirectional_cache(it) == GSC_FALSE) { // can't use this because what if our iterator user is modifying group allocations?
1769 }
1770
1771 if (it->group.num == GSC_NO_GROUP.num) {
1772
1773 // Search for the next value.
1774 if (it->localPos + 1 < it->cachedAM->n_genotypes) {
1775 // The next value is in the same gsc_AlleleMatrix
1776 it->localPos++;
1777 it->atStart = 0;
1778 return (gsc_GenoLocation) {
1779 .localAM = it->cachedAM,
1780 .localPos = it->localPos
1781 };
1782
1783 } else {
1784 // The next value is in the next gsc_AlleleMatrix
1785 gsc_AlleleMatrix* nextAM = it->cachedAM;
1786 int nextAMIndex = it->cachedAMIndex;
1787 do {
1788 nextAM = nextAM->next;
1789 nextAMIndex++;
1790 } while (nextAM != NULL && nextAM->n_genotypes == 0);
1791
1792 if (nextAM == NULL) {
1793 // There is no further gsc_AlleleMatrix; we are at the end of the iterator.
1794 it->atEnd = 1;
1796 } else {
1797 it->cachedAM = nextAM;
1798 it->cachedAMIndex = nextAMIndex;
1799 it->localPos = 0;
1800 it->atStart = 0;
1801 return (gsc_GenoLocation) {
1802 .localAM = it->cachedAM,
1803 .localPos = 0
1804 };
1805 }
1806 }
1807
1808 } else { // We are iterating through a specific group
1809
1810 // Search for the next value
1811 while(1) {
1812 if (it->localPos + 1 < it->cachedAM->n_genotypes) {
1813 for (++it->localPos; it->localPos < it->cachedAM->n_genotypes; ++it->localPos) {
1814 if (it->cachedAM->groups[it->localPos].num == it->group.num) {
1815 it->atStart = 0;
1816 return (gsc_GenoLocation) {
1817 .localAM = it->cachedAM,
1818 .localPos = it->localPos
1819 };
1820 }
1821 }
1822 }
1823
1824 gsc_AlleleMatrix* nextAM = it->cachedAM;
1825 int nextAMIndex = it->cachedAMIndex;
1826 do {
1827 nextAM = nextAM->next;
1828 nextAMIndex++;
1829 } while (nextAM != NULL && nextAM->n_genotypes == 0);
1830
1831 if (nextAM == NULL) {
1832 // There is no further gsc_AlleleMatrix; we are at the end of the iterator.
1833 it->atEnd = 1;
1835 } else {
1836 it->cachedAM = nextAM;
1837 it->cachedAMIndex = nextAMIndex;
1838 it->localPos = 0;
1839 if (it->cachedAM->groups[it->localPos].num == it->group.num) {
1840 it->atStart = 0;
1841 return (gsc_GenoLocation) {
1842 .localAM = it->cachedAM,
1843 .localPos = it->localPos
1844 };
1845 }
1846 }
1847 }
1848
1849 }
1850}
1851
1852
1877 if (it->localPos == GSC_NA_LOCALX) {
1879 }
1880
1881 if (it->atStart) { //|| validate_bidirectional_cache(it) == GSC_FALSE) {
1883 }
1884
1885 if (it->group.num == GSC_NO_GROUP.num) {
1886
1887 // Search for the previous value.
1888 if (it->localPos > 0) {
1889 // The previous value is in the same gsc_AlleleMatrix
1890 it->localPos--;
1891 it->atEnd = 0;
1892 return (gsc_GenoLocation) {
1893 .localAM = it->cachedAM,
1894 .localPos = it->localPos
1895 };
1896
1897 } else {
1898 // The previous value is in the previous gsc_AlleleMatrix
1899 if (it->cachedAMIndex == 0) {
1900 it->atStart = 1;
1902 } else {
1903 gsc_AlleleMatrix* nextAM = it->cachedAM;
1904 int nextAMIndex = it->cachedAMIndex;
1905 do {
1906 nextAMIndex--;
1907 nextAM = gsc_get_nth_AlleleMatrix(it->am, nextAMIndex);
1908 } while (nextAM != NULL && nextAM->n_genotypes == 0);
1909
1910 if (nextAM == NULL) {
1911 it->atStart = 1;
1913 } else {
1914 it->cachedAM = nextAM;
1915 it->cachedAMIndex = nextAMIndex;
1916 it->localPos = it->cachedAM->n_genotypes - 1;
1917 it->atEnd = 0;
1918 return (gsc_GenoLocation) {
1919 .localAM = it->cachedAM,
1920 .localPos = it->localPos
1921 };
1922 }
1923 }
1924 }
1925
1926 } else { // We are iterating through a specific group
1927
1928 // Search for the next value
1929 while(1) {
1930 if (it->localPos > 0) {
1931 for (--it->localPos; it->localPos >= 0; --it->localPos) {
1932 if (it->cachedAM->groups[it->localPos].num == it->group.num) {
1933 it->atEnd = 0;
1934 return (gsc_GenoLocation) {
1935 .localAM = it->cachedAM,
1936 .localPos = it->localPos
1937 };
1938 }
1939 }
1940 }
1941
1942 if (it->cachedAMIndex == 0) {
1943 it->atStart = 1;
1944 it->localPos = 0;
1946 } else {
1947 gsc_AlleleMatrix* nextAM = it->cachedAM;
1948 int nextAMIndex = it->cachedAMIndex;
1949 do {
1950 nextAMIndex--;
1951 nextAM = gsc_get_nth_AlleleMatrix(it->am, nextAMIndex);
1952 } while (nextAM != NULL && nextAM->n_genotypes == 0);
1953
1954 if (nextAM == NULL) {
1955 it->atStart = 1;
1957 } else {
1958 it->cachedAM = nextAM;
1959 it->cachedAMIndex = nextAMIndex;
1960 it->localPos = it->cachedAM->n_genotypes - 1;
1961 if (it->cachedAM->groups[it->localPos].num == it->group.num) {
1962 it->atEnd = 0;
1963 return (gsc_GenoLocation) {
1964 .localAM = it->cachedAM,
1965 .localPos = it->localPos
1966 };
1967 }
1968 }
1969 }
1970 }
1971 }
1972}
1973
1974
1998 // Validity checks for a random access iterator: largestCached must exist,
1999 // is indeed cached and belongs to the same group
2000 /*if (it->largestCached == GSC_NA_GLOBALX ||
2001 (!GSC_IS_VALID_LOCATION(it->cache[it->largestCached]) &&
2002 (it->group.num == GSC_NO_GROUP.num ||
2003 it->group.num != gsc_get_group(it->cache[it->largestCached]).num))) {
2004 return GSC_INVALID_GENO_LOCATION;
2005 }*/
2006
2007 // Step 0: Fail immediately if we know there aren't this many candidates in the group.
2008 if (it->groupSize != GSC_NA_GLOBALX && it->groupSize <= n) {
2010 }
2011
2012 // Step 1: Check if we have it in the cache.
2013 if (n < it->cacheSize) {
2014 // 'n' is less than or equal to our current furthest cached group member.
2015
2016 if (GSC_IS_VALID_LOCATION(it->cache[n])) { return it->cache[n]; }
2017 // Otherwise we do not have it cached, but we will enter it into the cache in the next section
2018 }
2019
2020 // Step 2: The effort of actually finding the nth group member.
2021 if (it->group.num == GSC_NO_GROUP.num) {
2022 // Assuming all non-end gsc_AlleleMatrix are filled to CONTIG_WIDTH
2023 gsc_GenoLocation expectedLocation = {
2025 .localPos = n % CONTIG_WIDTH
2026 };
2027 // Check n was not too large
2028 if (expectedLocation.localAM == NULL ||
2029 expectedLocation.localAM->n_genotypes <= expectedLocation.localPos) {
2031 }
2032 return expectedLocation;
2033
2034 } else { // searching for a particular group
2035
2036 gsc_AlleleMatrix* currentAM;
2037 GSC_GLOBALX_T groupN;
2038 GSC_LOCALX_T localPos;
2039
2040 if (!GSC_IS_VALID_LOCATION(it->cache[it->largestCached])) {
2041 // Cache is invalid. You should throw out the iterator and replace with a new one.
2043 }
2044
2045 // Search forwards from largestCached
2046 currentAM = it->cache[it->largestCached].localAM;
2047 groupN = it->largestCached;
2048 localPos = it->cache[it->largestCached].localPos + 1;
2049
2050 while (1) {
2051 for (; localPos < currentAM->n_genotypes; ++localPos) {
2052 // If we found a group member, cache it and count upwards towards n
2053 if (currentAM->groups[localPos].num == it->group.num) {
2054 it->largestCached = ++groupN;
2055
2056 // Do we need to expand the cache to hold this?
2057 if (it->largestCached >= it->cacheSize) {
2058 GSC_GLOBALX_T newCacheSize = it->cacheSize;
2059 if (it->cacheSize == 0) {
2060 newCacheSize = 25;
2061 } else {
2062 newCacheSize = newCacheSize << 1;
2063 }
2064 gsc_GenoLocation* newCache = gsc_malloc_wrap(sizeof(gsc_GenoLocation)*newCacheSize,GSC_TRUE);
2065 // initialise
2066 memcpy(newCache, it->cache, sizeof(*newCache)*it->cacheSize);
2067 for (GSC_GLOBALX_T i = it->cacheSize; i < newCacheSize; ++i) {
2068 newCache[i] = GSC_INVALID_GENO_LOCATION;
2069 }
2070 // clean
2071 GSC_FREE(it->cache);
2072 it->cache = newCache;
2073 it->cacheSize = newCacheSize;
2074 }
2075
2076 // Store this additional group member.
2077 it->cache[groupN] = (gsc_GenoLocation) {
2078 .localAM = currentAM,
2079 .localPos = localPos
2080 };
2081 if (groupN == n) {
2082 return it->cache[n];
2083 }
2084 }
2085 }
2086
2087 if (currentAM->next == NULL || currentAM->next->n_genotypes == 0) {
2088 // We are at the end of the iterator and have not found n
2089 it->groupSize = groupN + 1;
2091 } else {
2092 currentAM = currentAM->next;
2093 localPos = 0;
2094 }
2095
2096 }
2097 }
2098
2099}
2100
2122 if (id.id == GSC_NO_PEDIGREE.id) {
2123 fprintf(stderr, "Invalid ID %lu\n", (long unsigned int)id.id);
2124 return NULL;
2125 }
2126 if (start == NULL) {
2127 fprintf(stderr, "Invalid nonexistent allelematrix\n"); exit(1);
2128 }
2129 const gsc_AlleleMatrix* m = start;
2130
2131 while (1) {
2132 // try to find our id. Does this AM potentially have the right range for it?
2133 // If we're not sure, because either of the endpoints does not have its ID tracked,
2134 // check anyway
2135 if (m->n_genotypes != 0 && (id.id >= m->ids[0].id || m->ids[0].id == GSC_NO_PEDIGREE.id) &&
2136 (id.id <= m->ids[m->n_genotypes - 1].id || m->ids[m->n_genotypes - 1].id == GSC_NO_PEDIGREE.id)) {
2137
2139
2140 if (index > m->n_genotypes) {
2141 // search failed
2142 if (m->next == NULL) {
2143 fprintf(stderr, "Could not find the ID %lu: did you prematurely delete this genotype?\n", (long unsigned int)id.id);
2144 return NULL;
2145 } else {
2146 m = m->next;
2147 continue;
2148 }
2149 }
2150
2151 return m->names[index];
2152
2153 }
2154
2155 if (m->next == NULL) {
2156 fprintf(stderr, "Could not find the ID %lu: did you prematurely delete this genotype?\n", (long unsigned int)id.id);
2157 return NULL;
2158 } else {
2159 m = m->next;
2160 }
2161 }
2162}
2163
2188 const gsc_PedigreeID id,
2189 gsc_PedigreeID output[static 2]) {
2190 if (id.id == GSC_NO_PEDIGREE.id) {
2191 return 1;
2192 }
2193 if (start == NULL) {
2194 fprintf(stderr, "Invalid nonexistent allelematrix\n"); exit(1);
2195 }
2196 const gsc_AlleleMatrix* m = start;
2197 while (1) {
2198 // try to find our id. Does this AM have the right range for it?
2199 if (m->n_genotypes != 0 && id.id >= m->ids[0].id && id.id <= m->ids[m->n_genotypes - 1].id) {
2200 // perform binary search to find the exact index.
2202
2203 if (index == GSC_NA_LOCALX) {
2204 // search failed
2205 /*if (m->next == NULL) {
2206 fprintf(stderr, "Unable to locate ID %d in simulation memory (genotype has likely been deleted): pedigree past this point cannot be determined\n", id.id);
2207 return 2;
2208 } else {
2209 m = m->next;
2210 }*/
2211 continue;
2212 } else {
2213
2214 if (m->pedigrees[0][index].id != GSC_NO_PEDIGREE.id || m->pedigrees[1][index].id != GSC_NO_PEDIGREE.id) {
2215 output[0] = m->pedigrees[0][index];
2216 output[1] = m->pedigrees[1][index];
2217 return 0;
2218 }
2219 return 1; // if neither parent's id is known
2220 }
2221
2222 }
2223
2224 if (m->next == NULL) {
2225 fprintf(stderr, "Unable to locate ID %lu in simulation memory (genotype has likely been deleted): pedigree past this point cannot be determined\n", (long unsigned int)id.id);
2226 return 2;
2227 } else {
2228 m = m->next;
2229 }
2230 }
2231}
2232
2249 const size_t n_names,
2250 const char** names,
2251 gsc_PedigreeID* output) {
2252 if (start == NULL || (start->n_genotypes <= 0 && start->next == NULL)) {
2253 fprintf(stderr,"Invalid start parameter: gsc_AlleleMatrix* `start` must exist\n");
2254 return;
2255 }
2256 if (n_names < 1) {
2257 fprintf(stderr,"Invalid n_names parameter: Search list length must be positive\n");
2258 return;
2259 }
2260
2261 _Bool found;
2262 const gsc_AlleleMatrix* m;
2263
2264 for (size_t i = 0; i < n_names; ++i) {
2265 found = 0;
2266 output[i] = GSC_NO_PEDIGREE;
2267 m = start;
2268 while (1) {
2269 // try to identify the name in this AM
2270 for (GSC_LOCALX_T j = 0; j < m->n_genotypes; ++j) {
2271 if (strcmp(m->names[j], names[i]) == 0) {
2272 found = 1;
2273 output[i] = m->ids[j];
2274 break;
2275 }
2276 }
2277
2278 if (found) {
2279 break;
2280 }
2281 if ((m = m->next) == NULL) {
2282 fprintf(stderr, "Didn't find the name %s\n", names[i]);
2283 }
2284 }
2285 }
2286}
2287
2303 const gsc_PedigreeID parent1id,
2304 const gsc_PedigreeID parent2id) {
2305 if (start == NULL || (start->n_genotypes <= 0 && start->next == NULL)) {
2306 fprintf(stderr,"Invalid start parameter: gsc_AlleleMatrix* `start` must exist\n");
2307 return GSC_NA_GLOBALX;
2308 }
2309 const gsc_AlleleMatrix* m = start;
2310 GSC_GLOBALX_T total_j = 0;
2311
2312 while (1) {
2313 // try to identify the child in this AM
2314 for (GSC_LOCALX_T j = 0; j < m->n_genotypes; ++j, ++total_j) {
2315 if ((parent1id.id == m->pedigrees[0][j].id && parent2id.id == m->pedigrees[1][j].id) ||
2316 (parent1id.id == m->pedigrees[1][j].id && parent2id.id == m->pedigrees[0][j].id)) {
2317 return total_j;
2318 }
2319 }
2320
2321 if ((m = m->next) == NULL) {
2322 fprintf(stderr, "Didn't find the child of %lu & %lu\n",
2323 (long unsigned int)parent1id.id, (long unsigned int)parent2id.id);
2324 return GSC_NA_GLOBALX;
2325 }
2326 }
2327}
2328
2343 if (name == NULL) {
2344 return GSC_NA_GLOBALX;
2345 }
2346 if (start == NULL || (start->n_genotypes <= 0 && start->next == NULL)) {
2347 fprintf(stderr,"Invalid start parameter: gsc_AlleleMatrix* `start` must exist\n");
2348 return GSC_NA_GLOBALX;
2349 }
2350 const gsc_AlleleMatrix* m = start;
2351 GSC_GLOBALX_T total_j = 0;
2352
2353 while (1) {
2354 // try to identify the child in this AM
2355 for (GSC_LOCALX_T j = 0; j < m->n_genotypes; ++j, ++total_j) {
2356 if (m->names[j] != NULL && strcmp(m->names[j], name) == 0) {
2357 return total_j;
2358 }
2359 }
2360
2361 if ((m = m->next) == NULL) {
2362 fprintf(stderr, "Didn't find the name %s\n", name);
2363 return GSC_NA_GLOBALX;
2364 }
2365 }
2366}
2367
2380 const GSC_GLOBALX_T index) {
2381 if (start == NULL) {
2382 fprintf(stderr,"Invalid start parameter: gsc_AlleleMatrix* `start` must exist\n");
2383 return GSC_NO_PEDIGREE;
2384 }
2385 const gsc_AlleleMatrix* m = start;
2386 GSC_GLOBALX_T total_j = 0;
2387
2388 while (1) {
2389 if (total_j == index) {
2390 return m->ids[0];
2391 } else if (total_j < index && total_j + m->n_genotypes > index) {
2392 return m->ids[index - total_j];
2393 }
2394 total_j += m->n_genotypes;
2395
2396 if ((m = m->next) == NULL) {
2397 fprintf(stderr, "Didn't find the index %lu\n", (long unsigned int) index);
2398 return GSC_NO_PEDIGREE;
2399 }
2400 }
2401}
2402
2419 const GSC_GLOBALX_T index) {
2420 if (start == NULL) {
2421 fprintf(stderr, "Invalid nonexistent allelematrix\n");
2422 return NULL;
2423 }
2424 const gsc_AlleleMatrix* m = start;
2425 GSC_GLOBALX_T total_j = 0;
2426
2427 while (1) {
2428 if (total_j == index) {
2429 return m->alleles[0];
2430 } else if (total_j < index && total_j + m->n_genotypes > index) {
2431 return m->alleles[index - total_j];
2432 }
2433 total_j += m->n_genotypes;
2434
2435 if ((m = m->next) == NULL) {
2436 fprintf(stderr, "Didn't find the index %lu\n", (long unsigned int) index);
2437 return NULL;
2438 }
2439 }
2440}
2441
2442
2443
2444/*-----------------------------------Groups----------------------------------*/
2445
2460 const size_t list_len,
2461 const gsc_GroupNum* grouplist) {
2462
2463 // Find the first group in the list that exists. In most use cases this will be the
2464 // first group in the list, so not too much of a performance penalty.
2465 gsc_GroupNum outGroup = GSC_NO_GROUP;
2466 size_t i = 0;
2467 for (; i < list_len; ++i) {
2468 gsc_GroupNum candidate = grouplist[i];
2470 gsc_GenoLocation testloc = gsc_next_forwards(&testit);
2472 if (GSC_IS_VALID_LOCATION(testloc)) {
2473 outGroup = candidate;
2474 break;
2475 }
2476 }
2477
2478 int remaininglistlen = list_len - i;
2479 if (remaininglistlen < 2) {
2480 return outGroup;
2481 } else if (remaininglistlen == 2) {
2482 if (grouplist[i].num == grouplist[i+1].num) {
2483 return outGroup;
2484 }
2487 int anyFound = GSC_IS_VALID_LOCATION(loc);
2488
2489 while (GSC_IS_VALID_LOCATION(loc)) {
2490 gsc_set_group(loc,outGroup);
2491 loc = gsc_next_forwards(&it);
2492 }
2493
2494 if (anyFound) {
2495 d->n_groups--;
2496 }
2498 return outGroup;
2499
2500 } else {
2501 GSC_CREATE_BUFFER(isDuplicate,_Bool,remaininglistlen);
2502 memset(isDuplicate, 0, sizeof(_Bool)*remaininglistlen);
2503 for (size_t ii = i; ii < list_len; ++ii) {
2504 for (size_t jj = ii+1; jj < list_len; ++jj) {
2505 if (grouplist[ii].num == grouplist[jj].num) {
2506 isDuplicate[jj-i] = 1;
2507 }
2508 }
2509 }
2510
2511 GSC_CREATE_BUFFER(anyFound,_Bool,remaininglistlen);
2512 memset(anyFound, 0, sizeof(_Bool)*remaininglistlen);
2513
2515 gsc_GroupNum cachedgroup = GSC_NO_GROUP; // just for speedier lookups. Groups tend to be stored contiguous in most simulations.
2517
2518 while (GSC_IS_VALID_LOCATION(loc)) {
2519 if (gsc_get_group(loc).num == cachedgroup.num) {
2520 gsc_set_group(loc,outGroup);
2521 } else {
2522 for (size_t k = i+1; k < list_len; ++k) {
2523 if (gsc_get_group(loc).num == grouplist[k].num) {
2524 gsc_set_group(loc,outGroup);
2525 cachedgroup = grouplist[k];
2526 anyFound[k-i] = 1;
2527 break;
2528 }
2529 }
2530 }
2531
2532 loc = gsc_next_forwards(&it);
2533 }
2534
2535 size_t groupsgone = 0;
2536 for (size_t j = 0; j < remaininglistlen; ++j) {
2537 if (!isDuplicate[j] && anyFound[j]) {
2538 groupsgone++;
2539 }
2540 }
2541 d->n_groups -= groupsgone;
2543 GSC_DELETE_BUFFER(anyFound);
2544 GSC_DELETE_BUFFER(isDuplicate);
2545 return outGroup;
2546 }
2547}
2548
2566 const size_t index_list_len,
2567 const GSC_GLOBALX_T* genotype_indexes) {
2568 if (index_list_len < 1) {
2569 fprintf(stderr,"Invalid index_list_len value: length of allocation list must be at least 1\n");
2570 return GSC_NO_GROUP;
2571 }
2572
2573 gsc_GroupNum newGroup = gsc_get_new_group_num(d);
2575 size_t invalidLocations = 0;
2576 for (size_t i = 0; i < index_list_len; ++i) {
2577 gsc_GenoLocation loc = gsc_next_get_nth(&it, genotype_indexes[i]);
2578 if (GSC_IS_VALID_LOCATION(loc)) {
2579 gsc_set_group(loc,newGroup);
2580 } else {
2581 invalidLocations++;
2582 }
2583 }
2584
2585 if (invalidLocations > 0) {
2586 fprintf(stderr,"%lu indexes were invalid\n",(long unsigned int)invalidLocations);
2587 }
2588 if (invalidLocations < index_list_len) {
2589 d->n_groups++;
2590 }
2591
2593 return newGroup;
2594}
2595
2618 const gsc_GroupNum group,
2619 const gsc_LabelID whichLabel,
2620 const int valueToSplit) {
2621 GSC_ID_T labelix;
2622 if (whichLabel.id == GSC_NO_LABEL.id || (labelix = gsc_get_index_of_label(d, whichLabel)) == GSC_NA_IDX) {
2623 fprintf(stderr, "Nonexistent label %lu\n", (long unsigned int)whichLabel.id);
2624 return GSC_NO_GROUP;
2625 }
2626
2627 gsc_GroupNum newGroup = gsc_get_new_group_num(d);
2628 _Bool anyFound = 0;
2629
2632 while (GSC_IS_VALID_LOCATION(loc)) {
2633 if (gsc_get_label_value(loc,labelix) == valueToSplit) {
2634 gsc_set_group(loc,newGroup);
2635 anyFound = 1;
2636 }
2637
2638 loc = gsc_next_forwards(&it);
2639 }
2640
2641 if (anyFound) {
2642 d->n_groups++;
2643 return newGroup;
2644 } else {
2645 return GSC_NO_GROUP;
2646 }
2647
2648}
2649
2674 const gsc_GroupNum group,
2675 const gsc_LabelID whichLabel,
2676 const int valueLowBound,
2677 const int valueHighBound) {
2678 GSC_ID_T labelix;
2679 if (whichLabel.id == GSC_NO_LABEL.id || (labelix = gsc_get_index_of_label(d, whichLabel)) == GSC_NA_IDX) {
2680 fprintf(stderr, "Nonexistent label %lu\n", (long unsigned int)whichLabel.id);
2681 return GSC_NO_GROUP;
2682 }
2683 if (valueLowBound > valueHighBound) {
2684 fprintf(stderr, "Empty range %d to %d: no group created\n", valueLowBound, valueHighBound);
2685 return GSC_NO_GROUP;
2686 }
2687
2688 gsc_GroupNum newGroup = gsc_get_new_group_num(d);
2689 _Bool anyFound = 0;
2690
2693 while (GSC_IS_VALID_LOCATION(loc)) {
2694 if (gsc_get_label_value(loc,labelix) >= valueLowBound &&
2695 gsc_get_label_value(loc,labelix) <= valueHighBound) {
2696 gsc_set_group(loc,newGroup);
2697 anyFound = 1;
2698 }
2699
2700 loc = gsc_next_forwards(&it);
2701 }
2702
2703 if (anyFound) {
2704 d->n_groups++;
2705 return newGroup;
2706 } else {
2707 return GSC_NO_GROUP; // no values with that label
2708 }
2709}
2710
2711
2734 const gsc_GroupNum group_id,
2735 void* somequality_data,
2736 gsc_GroupNum (*somequality_tester)(gsc_GenoLocation,
2737 void*,
2738 size_t,
2739 size_t,
2740 gsc_GroupNum*),
2741 size_t maxentries_results,
2742 gsc_GroupNum* results) {
2743 // Access existing groups (to be used to find unused group numbers,
2744 // and to find maximum number of groups we'd be able to create)
2745 GSC_CREATE_BUFFER(currentgroups,gsc_GroupNum,d->n_groups);
2746 GSC_CREATE_BUFFER(currentsizes,GSC_GLOBALX_T,d->n_groups);
2747 size_t n_groups = gsc_get_existing_group_counts(d, currentgroups, currentsizes);
2748 size_t bookmark = 0;
2749 gsc_GroupNum nextgroup = GSC_NO_GROUP;
2750
2751 // splitgroupsize is size_t not GLOBALX_T because it will be used as the maximum number of output
2752 // groups that could be produced, not used to operate on candidates in the group. (By default though
2753 // GSC_GLOBALX_T is an alias of size_t so it makes no difference).
2754 size_t splitgroupsize = 0;
2755 for (size_t i = 0; i < n_groups; ++i) {
2756 if (currentgroups[i].num == group_id.num) {
2757 splitgroupsize = currentsizes[i];
2758 //GSC_FREE(currentsizes);
2759 break;
2760 }
2761 }
2762 if (splitgroupsize == 0) {
2763 return 0;
2764 }
2765
2766 GSC_DELETE_BUFFER(currentsizes);
2767 size_t subgroupsfound = 0;
2768 GSC_CREATE_BUFFER(outgroups,gsc_GroupNum,splitgroupsize);
2769
2772 while (GSC_IS_VALID_LOCATION(loc)) {
2773 // Return group number if it should be assigned to an already-extant group. Otherwise return GSC_NO_GROUP and this generic caller function will allocated it one.
2774 gsc_GroupNum assignedgroup = somequality_tester(loc, somequality_data,
2775 splitgroupsize, subgroupsfound, outgroups);
2776
2777 if (assignedgroup.num == GSC_NO_GROUP.num) {
2778 nextgroup = gsc_get_next_free_group_num(n_groups,currentgroups,&bookmark,nextgroup);
2779 assignedgroup = nextgroup;
2780 outgroups[subgroupsfound] = nextgroup;
2781 subgroupsfound++;
2782 }
2783
2784 gsc_set_group(loc,assignedgroup);
2785
2786 loc = gsc_next_forwards(&it);
2787 }
2788
2789 GSC_DELETE_BUFFER(currentgroups);
2790 d->n_groups += subgroupsfound - 1;
2791
2792 if (maxentries_results < subgroupsfound) {
2793 memcpy(results,outgroups,sizeof(gsc_GroupNum)*maxentries_results);
2794 fprintf(stderr,"Output vector size is not large enough to hold all created groups: "
2795 " output list of gsc_GroupNums has been truncated\n");
2796 } else {
2797 memcpy(results,outgroups,sizeof(gsc_GroupNum)*subgroupsfound);
2798 }
2799 GSC_DELETE_BUFFER(outgroups);
2800 return subgroupsfound;
2801}
2802
2804 void* datastore,
2805 size_t maxgroups,
2806 size_t groupsfound,
2807 gsc_GroupNum* results,
2808 gsc_PedigreeID (*getparent)(gsc_GenoLocation)) {
2809 gsc_PedigreeID* familyidentities = (gsc_PedigreeID*) datastore;
2810
2811 for (size_t j = 0; j < groupsfound; ++j) {
2812 if (getparent(loc).id == familyidentities[j].id) {
2813 return results[j];
2814 }
2815 }
2816
2817 if (groupsfound > maxgroups) {
2818 fprintf(stderr, "Attempted to split into more groups than caller deemed possible. "
2819 "There is a bug in the simulation tool if you can reach this state.");
2820 return results[maxgroups-1]; // allocate all to the last group, possibly incorrectly.
2821 }
2822
2823 familyidentities[groupsfound] = getparent(loc);
2824 return GSC_NO_GROUP;
2825}
2826
2828 void* datastore,
2829 size_t maxgroups,
2830 size_t groupsfound,
2831 gsc_GroupNum* results) {
2832 return gsc_helper_split_by_quality_halfsibtemplate(loc,datastore,maxgroups,groupsfound,results,
2834}
2835
2837 void* datastore,
2838 size_t maxgroups,
2839 size_t groupsfound,
2840 gsc_GroupNum* results) {
2841 return gsc_helper_split_by_quality_halfsibtemplate(loc,datastore,maxgroups,groupsfound,results,
2843}
2844
2879 const gsc_GroupNum group_id,
2880 const int parent,
2881 size_t maxentries_results,
2882 gsc_GroupNum* results) {
2883 if (!(parent == 1 || parent == 2)) {
2884 fprintf(stderr, "Value error: `parent` must be 1 or 2.");
2885 results = NULL;
2886 return 0;
2887 }
2888
2889 //gsc_PedigreeID* familyidentities = gsc_malloc_wrap(sizeof(gsc_PedigreeID)*maxgroups);
2890 GSC_GLOBALX_T maxgroups = gsc_get_group_size(d, group_id); // sadinefficient we have to do this
2891 GSC_CREATE_BUFFER(familyidentities,gsc_PedigreeID,maxgroups);
2892
2893 size_t gcount;
2894 if (parent == 1) {
2895 gcount = gsc_scaffold_split_by_somequality(d, group_id, (void*)familyidentities,
2897 maxentries_results, results);
2898 } else {
2899 gcount = gsc_scaffold_split_by_somequality(d, group_id, (void*)familyidentities,
2901 maxentries_results, results);
2902 }
2903
2904 GSC_DELETE_BUFFER(familyidentities);
2905 return gcount;
2906}
2907
2909 void* datastore,
2910 size_t maxgroups,
2911 size_t groupsfound,
2912 gsc_GroupNum* results) {
2913 gsc_PedigreeID** familyidentities = (gsc_PedigreeID**) datastore;
2914
2915 for (size_t j = 0; j < groupsfound; ++j) {
2916 if (gsc_get_first_parent(loc).id == familyidentities[0][j].id &&
2917 gsc_get_second_parent(loc).id == familyidentities[1][j].id) {
2918 return results[j];
2919 }
2920 }
2921
2922 if (groupsfound > maxgroups) {
2923 fprintf(stderr, "Attempted to split into more groups than caller deemed possible. "
2924 "There is a bug in the simulation tool if you can reach this state.");
2925 return results[maxgroups-1]; // allocate all to the last group, possibly incorrectly.
2926 }
2927
2928 familyidentities[0][groupsfound] = gsc_get_first_parent(loc);
2929 familyidentities[1][groupsfound] = gsc_get_second_parent(loc);
2930 return GSC_NO_GROUP;
2931}
2932
2961 const gsc_GroupNum group_id,
2962 size_t maxentries_results,
2963 gsc_GroupNum* results) {
2964 gsc_PedigreeID* familyidentities[2];
2965 GSC_GLOBALX_T maxgroups = gsc_get_group_size(d, group_id); // sadinefficient we have to do this
2966 if (maxgroups < 2) {
2967 return 0;
2968 }
2969
2970 GSC_CREATE_BUFFER(p1identity,gsc_PedigreeID,maxgroups);
2971 GSC_CREATE_BUFFER(p2identity,gsc_PedigreeID,maxgroups);
2972 familyidentities[0] = p1identity;
2973 familyidentities[1] = p2identity;
2974
2975 size_t out = gsc_scaffold_split_by_somequality(d, group_id, (void*)familyidentities,
2977 maxentries_results, results);
2978
2979 GSC_DELETE_BUFFER(p1identity);
2980 GSC_DELETE_BUFFER(p2identity);
2981
2982 return out;
2983}
2984
2986 void* datastore,
2987 size_t maxgroups,
2988 size_t groupsfound,
2989 gsc_GroupNum* results) {
2990 return GSC_NO_GROUP;
2991}
2992
3018 const gsc_GroupNum group_id,
3019 size_t maxentries_results,
3020 gsc_GroupNum* results) {
3021 // **individuate** (verb): to make individuals of.
3022 // yeah sorry.
3023 return gsc_scaffold_split_by_somequality(d, group_id, NULL,
3025 maxentries_results, results);
3026}
3027
3028
3052 const gsc_GroupNum group_id) {
3053 // get the shuffle to be our even allocations
3054 GSC_GLOBALX_T size = gsc_get_group_size(d, group_id);
3055 if (size < 2) {
3056 if (size < 1) {
3057 fprintf(stderr,"Group %lu does not exist\n", (long unsigned int) group_id.num);
3058 } else {
3059 fprintf(stderr,"Group %lu has only one member so can't be split\n", (long unsigned int) group_id.num);
3060 }
3061 return GSC_NO_GROUP;
3062 }
3063
3064 GSC_GLOBALX_T even_half = size / 2;
3065 GSC_CREATE_BUFFER(allocations,GSC_GLOBALX_T,size);
3066 for (GSC_GLOBALX_T i = 0; i < size; ++i) {
3067 allocations[i] = i;
3068 }
3069 gsc_shuffle_up_to(&d->rng, allocations, sizeof(allocations[0]), size, even_half);
3070
3071 gsc_GroupNum new_group = gsc_get_new_group_num(d);
3072
3074 for (GSC_GLOBALX_T i = 0; i < even_half; ++i) {
3075 gsc_GenoLocation loc = gsc_next_get_nth(&it,allocations[i]);
3076 if (GSC_IS_VALID_LOCATION(loc)) {
3077 gsc_set_group(loc,new_group);
3078 }
3079 }
3080
3081 GSC_DELETE_BUFFER(allocations);
3083
3084 d->n_groups++;
3085 return new_group;
3086}
3087
3088
3107 const gsc_GroupNum group_id,
3108 void* someallocator_data,
3109 gsc_GroupNum (*someallocator)(gsc_GenoLocation,
3110 gsc_SimData*,
3111 void*,
3112 size_t,
3113 size_t*,
3114 gsc_GroupNum*),
3115 size_t n_outgroups,
3116 gsc_GroupNum* outgroups) {
3117
3118 // get the n group numbers
3119 gsc_get_n_new_group_nums(d, n_outgroups, outgroups);
3120
3121 size_t subgroupsfound = 0;
3122 GSC_GLOBALX_T allocationfailures = 0;
3123
3126 while (GSC_IS_VALID_LOCATION(loc)) {
3127 gsc_GroupNum assignedgroup = someallocator(loc, d, someallocator_data,
3128 n_outgroups, &subgroupsfound, outgroups);
3129 if (assignedgroup.num != GSC_NO_GROUP.num) {
3130 gsc_set_group(loc,assignedgroup);
3131 } else {
3132 allocationfailures++;
3133 }
3134
3135 loc = gsc_next_forwards(&it);
3136 }
3137
3138 if (subgroupsfound > 1) {
3139 d->n_groups += subgroupsfound - 1;
3140 }
3141 if (allocationfailures > 0) {
3142 fprintf(stderr,"While splitting group %lu, %lu allocations to new groups failed so they remain"
3143 " in the original group\n",
3144 (long unsigned int) group_id.num, (long unsigned int) allocationfailures);
3145 }
3146 return subgroupsfound;
3147
3148}
3149
3150
3152 gsc_SimData* d,
3153 void* datastore,
3154 size_t n_outgroups,
3155 size_t* subgroupsfound,
3156 gsc_GroupNum* outgroups) {
3157 GSC_GLOBALX_T* cumulative_counts = (GSC_GLOBALX_T*) datastore;
3158 *subgroupsfound = n_outgroups;
3159 // type note: may misbehave with large numbers because is just designed for ints
3160 int randpos = rnd_pcg_range(&d->rng,0,cumulative_counts[n_outgroups-1] - 1);
3161
3162 gsc_GroupNum chosengroup = GSC_NO_GROUP;
3163 size_t j = 0;
3164 for (; j < n_outgroups; ++j) {
3165 if (randpos < cumulative_counts[j]) {
3166 chosengroup = outgroups[j];
3167 break;
3168 }
3169 }
3170 for (; j < n_outgroups; ++j) {
3171 cumulative_counts[j]--;
3172 }
3173 return chosengroup;
3174}
3175
3198 const gsc_GroupNum group_id,
3199 const size_t n,
3200 gsc_GroupNum* results) {
3201 if (n <= 1) {
3202 fprintf(stderr, "Invalid n value: number of fractions into which to split group must be at least 2\n");
3203 return 0;
3204 }
3205
3206 GSC_GLOBALX_T size = gsc_get_group_size(d, group_id); // sadinefficient we have to do this.
3207
3208 // get the shuffle to be our even allocations
3209 GSC_GLOBALX_T each_size = size / n;
3210 GSC_GLOBALX_T extra = size % n;
3212 for (size_t i = 0; i < n; ++i) {
3213 boxes[i] = each_size;
3214 if (i < extra) {
3215 boxes[i]++;
3216 }
3217 if (i > 0) {
3218 boxes[i] += boxes[i-1];
3219 }
3220 }
3221
3222 size_t out;
3223 if (results == NULL) {
3224 GSC_CREATE_BUFFER(outgroups,gsc_GroupNum, n);
3225 out = gsc_scaffold_split_by_someallocation(d, group_id, (void*) boxes,
3227 n, outgroups);
3228 GSC_DELETE_BUFFER(outgroups);
3229 } else {
3230 out = gsc_scaffold_split_by_someallocation(d, group_id, (void*) boxes,
3232 n, results);
3233 }
3234 GSC_DELETE_BUFFER(boxes);
3235 return out;
3236}
3237
3275 const gsc_GroupNum group_id,
3276 const size_t n,
3277 const GSC_GLOBALX_T* counts,
3278 gsc_GroupNum* results) {
3279 if (n <= 1) {
3280 fprintf(stderr, "Invalid n value: number of fractions into which to split group must be at least 2\n");
3281 return 0;
3282 }
3283
3284 GSC_CREATE_BUFFER(cumulative_counts,GSC_GLOBALX_T,n);
3285 cumulative_counts[n-1] = gsc_get_group_size(d, group_id);
3286 GSC_GLOBALX_T sum = 0;
3287 for (size_t j = 0; j < n - 1; ++j) {
3288 sum += counts[j];
3289 cumulative_counts[j] = sum;
3290 }
3291 if (cumulative_counts[n-2] > cumulative_counts[n-1]) {
3292 fprintf(stderr, "Provided capacities are larger than actual group: some buckets will not be filled\n");
3293 }
3294
3295 size_t gcount;
3296 if (results == NULL) {
3297 GSC_CREATE_BUFFER(outgroups,gsc_GroupNum,n);
3298 gcount = gsc_scaffold_split_by_someallocation(d, group_id, (void*) cumulative_counts,
3300 n, outgroups);
3301 GSC_DELETE_BUFFER(outgroups);
3302 } else {
3303 gcount = gsc_scaffold_split_by_someallocation(d, group_id, (void*) cumulative_counts,
3305 n, results);
3306 }
3307
3308 GSC_DELETE_BUFFER(cumulative_counts);
3309 return gcount;
3310}
3311
3334 const gsc_GroupNum group_id) {
3335 gsc_GroupNum outGroup = gsc_get_new_group_num(d);
3336 _Bool anyFound = 0;
3337
3340 while (GSC_IS_VALID_LOCATION(loc)) {
3341 anyFound = 1;
3342 if (rnd_pcg_range(&d->rng,0,1)) {
3343 gsc_set_group(loc,outGroup);
3344 }
3345 loc = gsc_next_forwards(&it);
3346 }
3348
3349 if (anyFound) {
3350 d->n_groups++;
3351 return outGroup;
3352 } else {
3353 return GSC_NO_GROUP;
3354 }
3355}
3356
3357
3359 gsc_SimData* d,
3360 void* datastore,
3361 size_t n_outgroups,
3362 size_t* subgroupsfound,
3363 gsc_GroupNum* outgroups) {
3364 // consideration: will be an issue in C version if n_outgroups > INT_MAX.
3365 size_t randgroup = rnd_pcg_range(&d->rng,0,n_outgroups-1);
3366 if (randgroup < *subgroupsfound) {
3367 return outgroups[randgroup];
3368 } else {
3369 (*subgroupsfound)++;
3370 return outgroups[*subgroupsfound-1];
3371 }
3372}
3373
3399 const gsc_GroupNum group_id,
3400 const size_t n,
3401 gsc_GroupNum* results) {
3402 if (n <= 1) {
3403 fprintf(stderr, "Invalid n value: number of fractions in which to split group must be at least 2\n");
3404 return 0;
3405 }
3406
3407 size_t gcount;
3408 if (results == NULL) {
3409 GSC_CREATE_BUFFER(outgroups,gsc_GroupNum,n);
3410 gcount = gsc_scaffold_split_by_someallocation(d, group_id, NULL,
3412 n, outgroups);
3413 GSC_DELETE_BUFFER(outgroups);
3414 } else {
3415 gcount = gsc_scaffold_split_by_someallocation(d, group_id, NULL,
3417 n, results);
3418 }
3419 return gcount;
3420}
3421
3422
3424 gsc_SimData* d,
3425 void* datastore,
3426 size_t n_outgroups,
3427 size_t* subgroupsfound,
3428 gsc_GroupNum* outgroups) {
3429 double* cumulative_probs = (double*) datastore;
3430 *subgroupsfound = n_outgroups;
3431 double randdraw = rnd_pcg_nextf(&d->rng);
3432 for (size_t j = 0; j < n_outgroups; ++j) {
3433 if (randdraw < cumulative_probs[j]) {
3434 return outgroups[j];
3435 }
3436 }
3437 // This should not happen if cumulative probs are valid
3438 return GSC_NO_GROUP;
3439}
3440
3474 const gsc_GroupNum group_id,
3475 const size_t n,
3476 const double* probs,
3477 gsc_GroupNum* results) {
3478 if (n <= 1) {
3479 fprintf(stderr, "Invalid n value: number of fractions in which to split group must be at least 2\n");
3480 return 0;
3481 }
3482
3483 // Check the probabilities
3484 GSC_CREATE_BUFFER(cumulative_probs,double,n);
3485 cumulative_probs[n-1] = 1.0;
3486 double sum = 0;
3487 for (size_t j = 0; j < n-1; ++j) {
3488 sum += probs[j];
3489 cumulative_probs[j] = sum;
3490 if (cumulative_probs[j] >= 1) {
3491 fprintf(stderr, "Provided probabilities add up to 1 or more: some buckets will not be filled\n");
3492 for (; j < n-1; ++j) {
3493 cumulative_probs[j] = 1;
3494 }
3495 //don't bother to calculate more
3496 break;
3497 }
3498 }
3499
3500 size_t gcount;
3501 if (results == NULL) {
3502 GSC_CREATE_BUFFER(outgroups,gsc_GroupNum,n);
3503 gcount = gsc_scaffold_split_by_someallocation(d, group_id, (void*) cumulative_probs,
3505 n, outgroups);
3506 GSC_DELETE_BUFFER(outgroups);
3507 } else {
3508 gcount = gsc_scaffold_split_by_someallocation(d, group_id, (void*) cumulative_probs,
3510 n, results);
3511 }
3512
3513 GSC_DELETE_BUFFER(cumulative_probs);
3514 return gcount;
3515}
3516
3517
3540 return gsc_get_existing_group_counts(d, output, NULL);
3541}
3542
3560 GSC_CREATE_BUFFER(buckets,GSC_GLOBALX_T,d->n_groups+1); // this also creates bucketscap, initalised to d->n_groups+1.
3561 memset(buckets,0,sizeof(GSC_GLOBALX_T)*bucketscap);
3562 size_t filledbuckets = 0;
3563
3566 while (GSC_IS_VALID_LOCATION(loc)) {
3567 gsc_GroupNum g = gsc_get_group(loc);
3568 // Unless all group numbers are consecutive starting at 1, the buckets array will need to be resized at some point.
3569 if (g.num >= bucketscap) {
3570 size_t oldcap = bucketscap;
3571 size_t newbucketcapacity = bucketscap;
3572 while (g.num >= newbucketcapacity) {
3573 newbucketcapacity *= 2;
3574 }
3575 GSC_STRETCH_BUFFER(buckets,newbucketcapacity);
3576 if (g.num >= bucketscap) {
3577 fprintf(stderr,"Memory allocation failed. Not all groups found\n");
3578 break;
3579 }
3580 memset(buckets+oldcap,0,sizeof(GSC_GLOBALX_T)*(bucketscap-oldcap));
3581
3582 }
3583
3584 buckets[g.num] += 1;
3585 if (buckets[g.num] == 1) {
3586 ++filledbuckets;
3587 }
3588
3589 loc = gsc_next_forwards(&it);
3590 }
3591
3592 // Now save to output and sort.
3593 size_t capacity = filledbuckets;
3594 if (capacity > d->n_groups) {
3595 fprintf(stderr,"Found more groups than expected - gsc_SimData.n_groups is outdated somewhere."
3596 " Trimming output of get_existing_group_ to avoid a crash: not all groups may be shown\n");
3597 capacity = d->n_groups;
3598 }
3599 size_t g_index = 0;
3600 for (size_t i = 1; i < bucketscap; ++i) {
3601 if (buckets[i]) {
3602 /*if (g_index >= capacity) {
3603 fprintf(stderr,"Found more groups than just a moment ago.");
3604 --g_index;
3605 break;
3606 }*/
3607
3608 if (out_groups != NULL) {
3609 out_groups[g_index] = (gsc_GroupNum){.num=i};
3610 }
3611 if (out_sizes != NULL) {
3612 out_sizes[g_index] = buckets[i];
3613 }
3614 ++g_index;
3615 }
3616 }
3617
3618 //qsort(*out_groups, g_index, sizeof(gsc_GroupNum), gsc_helper_ascending_gsc_GroupNum_comparer);
3619 /*for (int i = 0; i < g_index; ++i) {
3620 (*out_sizes)[i] = buckets[(*out_groups)[i].num];
3621 }*/
3622 GSC_DELETE_BUFFER(buckets);
3623 d->n_groups = g_index;
3624
3625 return g_index;
3626}
3627
3628
3652gsc_GroupNum gsc_get_next_free_group_num(const size_t n_existing_groups,
3653 const gsc_GroupNum* existing_groups,
3654 size_t* cursor,
3655 gsc_GroupNum previous) {
3656 if (existing_groups == NULL) return GSC_NO_GROUP;
3657
3658 gsc_GroupNum nextgroup = (gsc_GroupNum){.num=previous.num+1};
3659 // a check here in case previous seems invalid. We need previous so we don't get stuck in a loop
3660 // of giving the same next 'free' number, but we know what a lower bound on its number should be
3661 // based on where the cursor is.
3662 if (*cursor > 0 && nextgroup.num <= existing_groups[(*cursor) - 1].num) {
3663 nextgroup.num = existing_groups[(*cursor) - 1].num + 1;
3664 }
3665
3666 while (*cursor < n_existing_groups) {
3667 if (nextgroup.num < existing_groups[*cursor].num) {
3668 break;
3669 }
3670
3671 ++(*cursor);
3672 ++nextgroup.num;
3673 }
3674 return nextgroup;
3675}
3676
3691 // Make sure we get all existing groups
3692 if (d->m == NULL || (d->m->n_genotypes == 0 && d->m->next == NULL)) {
3693 return (gsc_GroupNum){.num=1};
3694 }
3695
3696 size_t n_groups = (d->n_groups > 0) ? d->n_groups : 5;
3697 GSC_CREATE_BUFFER(existing_groups,gsc_GroupNum,n_groups);
3698 n_groups = gsc_get_existing_groups(d, existing_groups);
3699
3700 size_t i = 0;
3701 GSC_ID_T gn = 1;
3702
3703 while (i < n_groups) {
3704 if (gn < existing_groups[i].num) {
3705 break;
3706 }
3707
3708 ++i;
3709 ++gn;
3710 }
3711 GSC_DELETE_BUFFER(existing_groups);
3712 return (gsc_GroupNum){.num=gn};
3713}
3714
3715
3725 const size_t n,
3726 gsc_GroupNum* result) {
3727 // Make sure we get all existing groups
3728 size_t n_groups;
3729 GSC_CREATE_BUFFER(existing_groups,gsc_GroupNum,d->n_groups);
3730 n_groups = gsc_get_existing_groups(d, existing_groups);
3731
3732 size_t existingi = 0;
3733 GSC_ID_T gn = 0;
3734
3735 // i: current index of `results` (the array of currently empty group numbers)
3736 // gn: group number being checked against existing_groups. if not in there is added to
3737 // the list of results
3738 // existingi: current index of existing_groups
3739 for (size_t i = 0; i < n; ++i) {
3740 ++gn;
3741 while (existingi < n_groups) {
3742 if (gn < existing_groups[existingi].num) {
3743 break;
3744 }
3745
3746 ++existingi;
3747 ++gn;
3748 }
3749 result[i] = (gsc_GroupNum){.num=gn};
3750 }
3751 GSC_DELETE_BUFFER(existing_groups);
3752}
3753
3762 // label_ids must be in sequential order
3763 gsc_LabelID new = {.id=1};
3764 GSC_ID_T i = 0;
3765
3766 while (i < d->n_labels) {
3767 if (new.id < d->label_ids[i].id) {
3768 break;
3769 }
3770
3771 ++i;
3772 ++(new.id);
3773 }
3774
3775 return new;
3776}
3777
3786 // label_ids must be in sequential order
3787 gsc_EffectID new = { .id=1 };
3788 GSC_ID_T i = 0;
3789
3790 while (i < d->n_eff_sets) {
3791 if (new.id < d->eff_set_ids[i].id) {
3792 break;
3793 }
3794
3795 ++i;
3796 ++(new.id);
3797 }
3798
3799 return new;
3800}
3801
3810 // map IDs must be in sequential order
3811 gsc_MapID new = { .id=1 };
3812 GSC_ID_T i = 0;
3813
3814 while (i < d->genome.n_maps) {
3815 if (new.id < d->genome.map_ids[i].id) {
3816 break;
3817 }
3818
3819 ++i;
3820 ++(new.id);
3821 }
3822
3823 return new;
3824}
3825
3826
3836 if (d->n_labels == 0) { return GSC_NA_IDX; } // immediate fail
3837 if (d->n_labels == 1) { return (d->label_ids[0].id == label.id) ? 0 : GSC_NA_IDX ; }
3838
3839 // If there's at least two labels then we binary search.
3840 GSC_ID_T first = 0;
3841 GSC_ID_T last = d->n_labels;
3842 GSC_ID_T mid;
3843
3844 while (first <= last) {
3845 mid = (first + last) / 2;
3846
3847 if (d->label_ids[mid].id == label.id) {
3848 return mid;
3849 } else if (d->label_ids[mid].id < label.id) {
3850 first = mid + 1;
3851 } else {
3852 last = mid - 1;
3853 }
3854
3855 }
3856
3857 return GSC_NA_IDX;
3858}
3859
3868 if (d->n_eff_sets == 0) { return GSC_NA_IDX; } // immediate fail
3869 if (d->n_eff_sets == 1) { return (d->eff_set_ids[0].id == eff_set_id.id) ? 0 : GSC_NA_IDX ; }
3870
3871 // If there's at least two labels then we binary search.
3872 GSC_ID_T first = 0;
3873 GSC_ID_T last = d->n_eff_sets;
3874 GSC_ID_T mid;
3875
3876 while (first <= last) {
3877 mid = (first + last) / 2;
3878
3879 if (d->eff_set_ids[mid].id == eff_set_id.id) {
3880 return mid;
3881 } else if (d->eff_set_ids[mid].id < eff_set_id.id) {
3882 first = mid + 1;
3883 } else {
3884 last = mid - 1;
3885 }
3886
3887 }
3888
3889 return GSC_NA_IDX;
3890}
3891
3900 if (d->genome.n_maps == 0) { return GSC_NA_IDX; } // immediate fail
3901 if (d->genome.n_maps == 1) { return (d->genome.map_ids[0].id == map.id) ? 0 : GSC_NA_IDX ; }
3902
3903 // If there's at least two labels then we binary search.
3904 GSC_ID_T first = 0;
3905 GSC_ID_T last = d->genome.n_maps;
3906 GSC_ID_T mid;
3907
3908 while (first <= last) {
3909 mid = (first + last) / 2;
3910
3911 if (d->genome.map_ids[mid].id == map.id) {
3912 return mid;
3913 } else if (d->genome.map_ids[mid].id < map.id) {
3914 first = mid + 1;
3915 } else {
3916 last = mid - 1;
3917 }
3918
3919 }
3920
3921 return GSC_NA_IDX;
3922}
3923
3924//-----------------------------------Data Access-----------------------------------
3925
3941 if (group_id.num == GSC_NO_GROUP.num) {
3942 return 0; // it is not a group so it does not have a size
3943 }
3944 const gsc_AlleleMatrix* m = d->m;
3945 GSC_GLOBALX_T size = 0;
3946 while (1) {
3947 for (GSC_LOCALX_T i = 0; i < m->n_genotypes; ++i) {
3948 if (m->groups[i].num == group_id.num) {
3949 ++size;
3950 }
3951 }
3952
3953 if (m->next == NULL) {
3954 return size;
3955 } else {
3956 m = m->next;
3957 }
3958 }
3959}
3960
3979 const gsc_GroupNum group_id,
3980 GSC_GLOBALX_T group_size,
3981 char** output) {
3982 const gsc_AlleleMatrix* m = d->m;
3983 GSC_GLOBALX_T outix = 0;
3984 while (1) {
3985 for (GSC_LOCALX_T i = 0; i < m->n_genotypes; ++i) {
3986 if (m->groups[i].num == group_id.num) {
3987 output[outix] = m->alleles[i];
3988 ++outix;
3989 if (outix == group_size) {
3990 return outix;
3991 }
3992 }
3993 }
3994
3995 if (m->next == NULL) {
3996 return outix;
3997 } else {
3998 m = m->next;
3999 }
4000 }
4001}
4002
4020 const gsc_GroupNum group_id,
4021 GSC_GLOBALX_T group_size,
4022 char** output) {
4023 const gsc_AlleleMatrix* m = d->m;
4024 GSC_GLOBALX_T outix = 0;
4025 while (1) {
4026 for (GSC_LOCALX_T i = 0; i < m->n_genotypes; ++i) {
4027 if (m->groups[i].num == group_id.num) {
4028 output[outix] = m->names[i];
4029 ++outix;
4030 if (outix == group_size) {
4031 return outix;
4032 }
4033 }
4034 }
4035
4036 if (m->next == NULL) {
4037 return outix;
4038 } else {
4039 m = m->next;
4040 }
4041 }
4042}
4043
4061 const gsc_GroupNum group_id,
4062 GSC_GLOBALX_T group_size,
4063 gsc_PedigreeID *output) {
4064 const gsc_AlleleMatrix* m = d->m;
4065 GSC_GLOBALX_T outix = 0;
4066 while (1) {
4067 for (GSC_LOCALX_T i = 0; i < m->n_genotypes; ++i) {
4068 if (m->groups[i].num == group_id.num) {
4069 output[outix] = m->ids[i];
4070 ++outix;
4071 if (outix == group_size) {
4072 return outix;
4073 }
4074 }
4075 }
4076
4077 if (m->next == NULL) {
4078 return outix;
4079 } else {
4080 m = m->next;
4081 }
4082 }
4083}
4084
4102 const gsc_GroupNum group_id,
4103 GSC_GLOBALX_T group_size,
4104 GSC_GLOBALX_T* output) {
4105 const gsc_AlleleMatrix* m = d->m;
4106 GSC_GLOBALX_T total_i = 0, outix = 0;
4107 while (1) {
4108 for (GSC_LOCALX_T i = 0; i < m->n_genotypes; ++i, ++total_i) {
4109 if (m->groups[i].num == group_id.num) {
4110 output[outix] = total_i;
4111 ++outix;
4112 if (outix == group_size) {
4113 return outix;
4114 }
4115 }
4116 }
4117
4118 if (m->next == NULL) {
4119 return outix;
4120 } else {
4121 m = m->next;
4122 }
4123 }
4124}
4125
4144 const gsc_GroupNum group_id,
4145 const gsc_EffectID effID,
4146 GSC_GLOBALX_T group_size,
4147 double* output) {
4148 gsc_DecimalMatrix dm_bvs = gsc_calculate_bvs(d, group_id, effID );
4149
4150 for (size_t i = 0; i < dm_bvs.cols; ++i) {
4151 output[i] = dm_bvs.matrix[0][i];
4152 if (i + 1 == group_size) {
4153 break;
4154 }
4155 }
4156
4157 gsc_delete_dmatrix(&dm_bvs);
4158
4159 return group_size;
4160}
4161
4182 const gsc_GroupNum group_id,
4183 GSC_GLOBALX_T group_size,
4184 const int whichParent,
4185 gsc_PedigreeID* output) {
4186 if (!(whichParent == 1 || whichParent == 2)) {
4187 fprintf(stderr, "Value error: `parent` must be 1 or 2.");
4188 return GSC_NA_GLOBALX;
4189 }
4190 int parent = whichParent - 1;
4191
4192 const gsc_AlleleMatrix* m = d->m;
4193 GSC_GLOBALX_T outix = 0;
4194 while (1) {
4195 for (GSC_LOCALX_T i = 0; i < m->n_genotypes; ++i) {
4196 if (m->groups[i].num == group_id.num) {
4197 output[outix] = m->pedigrees[parent][i];
4198 ++outix;
4199 if (outix == group_size) {
4200 return outix;
4201 }
4202 }
4203 }
4204
4205 if (m->next == NULL) {
4206 return outix;
4207 } else {
4208 m = m->next;
4209 }
4210 }
4211}
4212
4233 const gsc_GroupNum group_id,
4234 GSC_GLOBALX_T group_size,
4235 const int whichParent,
4236 char** output) {
4237 if (!(whichParent == 1 || whichParent == 2)) {
4238 fprintf(stderr, "Value error: `parent` must be 1 or 2.");
4239 return GSC_NA_GLOBALX;
4240 }
4241 int parent = whichParent - 1;
4242
4243 const gsc_AlleleMatrix* m = d->m;
4244 GSC_GLOBALX_T outix = 0;
4245 while (1) {
4246 for (GSC_LOCALX_T i = 0; i < m->n_genotypes; ++i) {
4247 if (m->groups[i].num == group_id.num) {
4248 if (m->pedigrees[parent][i].id != GSC_NO_PEDIGREE.id) {
4249 output[outix] = gsc_get_name_of_id(d->m, m->pedigrees[parent][i]);
4250 } else {
4251 output[outix] = NULL;
4252 }
4253 ++outix;
4254 if (outix == group_size) {
4255 return outix;
4256 }
4257 }
4258 }
4259
4260 if (m->next == NULL) {
4261 return outix;
4262 } else {
4263 m = m->next;
4264 }
4265 }
4266}
4267
4298 const gsc_GroupNum group_id,
4299 GSC_GLOBALX_T group_size,
4300 char** output) {
4301 char* fname = "gS_gpptmp";
4302 gsc_save_pedigrees(fname,d,group_id,GSC_TRUE);
4303
4304 FILE* fp2;
4305 if ((fp2 = fopen(fname, "r")) == NULL) {
4306 fprintf(stderr, "Failed to use temporary file\n");
4307 return GSC_NA_GLOBALX;
4308 }
4309
4310 // Create the list that we will return
4311 if (group_size == 0 || group_size == GSC_NA_GLOBALX) {
4312 group_size = gsc_get_group_size( d, group_id );
4313 if (group_size == 0) { return 0; }
4314 }
4315
4316 // read one line at a time
4317 //size_t n;
4318 //int line_len;
4319 unsigned int size;
4320 unsigned int index;
4321 int nextc;
4322 for (GSC_GLOBALX_T i = 0; i < group_size; ++i) {
4323 // getline is not available in MinGW it looks like (AUG 2021)
4324 /*gp_ped[i] = NULL;
4325 if ((line_len = getline(&(gp_ped[i]), &n, fp2)) == -1) {
4326 error("Failed to get %d th pedigree\n", i);
4327 }
4328 // remove the newline character
4329 if (gp_ped[i][line_len - 1] == '\n') {
4330 gp_ped[i][line_len - 1] = '\0';
4331 }*/
4332
4333 // a not-very-size-efficient, fgets-based line getter
4334 size = 50;
4335 index = 0;
4336 output[i] = gsc_malloc_wrap(sizeof(char) * size,GSC_TRUE);
4337 while ((nextc = fgetc(fp2)) != '\n' && nextc != EOF) {
4338 output[i][index] = nextc;
4339 ++index;
4340
4341 if (index >= size) {
4342 size *= 2;
4343 char* temp = realloc(output[i], sizeof(char) * size);
4344 if (temp == NULL) {
4345 GSC_FREE(output[i]);
4346 fprintf(stderr, "Memory allocation of size %u failed.\n", size);
4347 output[i] = NULL;
4348 } else {
4349 output[i] = temp;
4350 }
4351 }
4352 }
4353 output[i][index] = '\0';
4354 }
4355
4356 fclose(fp2);
4357 remove(fname);
4358
4359 return group_size;
4360}
4361
4362/*---------------------- matrix-operations.c dregs -------------------*/
4363
4371gsc_DecimalMatrix gsc_generate_zero_dmatrix(const size_t r, const size_t c) {
4372 gsc_DecimalMatrix zeros;
4373 zeros.rows = r;
4374 zeros.cols = c;
4375
4376 zeros.matrix = gsc_malloc_wrap(sizeof(*zeros.matrix) * r,GSC_TRUE);
4377 for (size_t i = 0; i < r; ++i) {
4378 zeros.matrix[i] = gsc_malloc_wrap(sizeof(*(zeros.matrix[i])) * c,GSC_TRUE);
4379 for (size_t j = 0; j < c; ++j) {
4380 zeros.matrix[i][j] = 0.0;
4381 }
4382 }
4383 return zeros;
4384}
4385
4402 const gsc_DecimalMatrix* a,
4403 const double* b) {
4404 /*if (a->cols != b->rows) {
4405 fprintf(stderr, "Dimensions invalid for matrix multiplication."); exit(1);
4406 }*/
4407
4408 //if (result->rows != a->rows || result->cols != b->cols) {
4409 if (result->cols != a->rows) { //these dimensions make it so result is one heap array, using only first row.
4410 fprintf(stderr, "Dimensions invalid for adding to result: %lu does not fit in %lu\n",
4411 (long unsigned int) a->rows, (long unsigned int) result->cols);
4412 return 1;
4413 }
4414
4415 double cell;
4416
4417 for (size_t i = 0; i < result->cols; ++i) {
4418 cell = 0;
4419 for (size_t j = 0; j < a->cols; ++j) {
4420 // for each cell, we loop through each of the pairs adjacent to it.
4421 cell += (a->matrix[i][j]) * b[j];
4422 }
4423
4424 result->matrix[0][i] += cell;
4425 }
4426
4427 return 0;
4428
4429}
4430
4455 const gsc_DecimalMatrix* amat,
4456 const double* avec,
4457 const gsc_DecimalMatrix* bmat,
4458 const double* bvec) {
4459
4460 if (result->cols != amat->rows) { //these dimensions make it so result is one heap array, using only first row.
4461 fprintf(stderr, "Dimensions invalid for adding to result: %lu does not fit in %lu\n",
4462 (long unsigned int) amat->rows, (long unsigned int) result->cols);
4463 return 1;
4464 }
4465 if (result->cols != bmat->rows) {
4466 fprintf(stderr, "Dimensions invalid for adding to result: %lu does not fit in %lu\n",
4467 (long unsigned int) bmat->rows, (long unsigned int) result->cols);
4468 return 1;
4469 }
4470 if (amat->cols != bmat->cols) {
4471 fprintf(stderr, "Dimensions of the two products are uneven: length %lu does not match length %lu\n",
4472 (long unsigned int) amat->cols, (long unsigned int) bmat->cols);
4473 return 1;
4474 }
4475
4476 double cell;
4477
4478 for (size_t i = 0; i < result->cols; ++i) {
4479 cell = 0;
4480 for (size_t j = 0; j < amat->cols; ++j) {
4481 // for each cell, we loop through each of the pairs adjacent to it.
4482 cell += (amat->matrix[i][j]) * avec[j];
4483 cell += (bmat->matrix[i][j]) * bvec[j];
4484 }
4485
4486 result->matrix[0][i] += cell;
4487 }
4488
4489 return 0;
4490}
4491
4492
4499 if (m->matrix != NULL) {
4500 for (size_t i = 0; i < m->rows; i++) {
4501 if (m->matrix[i] != NULL) {
4502 GSC_FREE(m->matrix[i]);
4503 }
4504 }
4505 GSC_FREE(m->matrix);
4506 m->matrix = NULL;
4507 }
4508 m->cols = 0;
4509 m->rows = 0;
4510}
4511
4512
4513/*--------------------------------Deleting-----------------------------------*/
4514
4529 gsc_AlleleMatrix* m = d->m;
4530 GSC_GLOBALX_T total_deleted = 0;
4531 while (1) {
4532 GSC_LOCALX_T deleted = 0;
4533 for (GSC_LOCALX_T i = 0; i < m->n_genotypes; ++i) {
4534 if (m->groups[i].num == group_id.num) {
4535 // delete data
4536 if (m->names[i] != NULL) {
4537 GSC_FREE(m->names[i]);
4538 m->names[i] = NULL;
4539 }
4540 if (m->alleles[i] != NULL) {
4541 GSC_FREE(m->alleles[i]);
4542 m->alleles[i] = NULL;
4543 }
4544 m->ids[i] = GSC_NO_PEDIGREE;
4545 m->pedigrees[0][i] = GSC_NO_PEDIGREE;
4546 m->pedigrees[1][i] = GSC_NO_PEDIGREE;
4547 m->groups[i] = GSC_NO_GROUP;
4548 ++deleted;
4549 }
4550 }
4551 m->n_genotypes -= deleted;
4552 total_deleted += deleted;
4553
4554 if (m->next == NULL) {
4556 printf("%lu genotypes were deleted\n", (long unsigned int) total_deleted);
4557 d->n_groups--;
4558 return;
4559 } else {
4560 m = m->next;
4561 }
4562 }
4563}
4564
4573 GSC_ID_T which_ix = gsc_get_index_of_eff_set(d, effID);
4574 if (which_ix == GSC_NA_LOCALX) {
4575 fprintf(stderr, "Nonexistent effect set %lu\n", (long unsigned int) effID.id);
4576 return;
4577 }
4578
4579 if (d->n_eff_sets == 1) {
4581 d->n_eff_sets = 0;
4582 GSC_FREE(d->e);
4584 d->e = NULL;
4585 d->eff_set_ids = NULL;
4586 } else {
4587 d->n_eff_sets--;
4588
4589 gsc_delete_effect_matrix(d->e + which_ix);
4590 gsc_EffectMatrix* newE = gsc_malloc_wrap(sizeof(*d->e)*d->n_eff_sets,GSC_FALSE);
4591 if (newE == NULL) {
4592 gsc_EffectMatrix cleared = d->e[which_ix];
4593 for (GSC_ID_T i = which_ix; i < d->n_eff_sets-1; ++i) {
4594 d->e[i] = d->e[i+1];
4595 }
4596 d->e[d->n_eff_sets] = cleared;
4597 } else {
4598 memcpy(newE, d->e, sizeof(*d->e)*which_ix);
4599 memcpy(newE + which_ix, d->e + which_ix + 1, sizeof(*d->e)*(d->n_eff_sets - which_ix));
4600 GSC_FREE(d->e);
4601 d->e = newE;
4602 }
4603
4605 if (newIDs == NULL) {
4606 for (GSC_ID_T i = which_ix; i < d->n_eff_sets-1; ++i) {
4607 d->eff_set_ids[i] = d->eff_set_ids[i+1];
4608 }
4610 } else {
4611 memcpy(newIDs, d->eff_set_ids, sizeof(*d->eff_set_ids)*which_ix);
4612 memcpy(newIDs + which_ix, d->eff_set_ids + which_ix + 1, sizeof(*d->eff_set_ids)*(d->n_eff_sets - which_ix));
4614 d->eff_set_ids = newIDs;
4615 }
4616 }
4617}
4618
4626void gsc_delete_label(gsc_SimData* d, const gsc_LabelID which_label) {
4627 GSC_ID_T label_ix;
4628 if (which_label.id == GSC_NO_LABEL.id ||
4629 (label_ix = gsc_get_index_of_label(d, which_label)) == GSC_NA_LOCALX) {
4630 fprintf(stderr, "Nonexistent label %lu\n", (long unsigned int)which_label.id);
4631 return;
4632 }
4633
4634 if (d->n_labels == 1) {
4635 // Delete 'em all
4636 d->n_labels = 0;
4637 GSC_FREE(d->label_ids);
4638 d->label_ids = NULL;
4640 d->label_defaults = NULL;
4641
4642 gsc_AlleleMatrix* m = d->m;
4643 do {
4644
4645 GSC_FREE(m->labels[0]);
4646 GSC_FREE(m->labels);
4647 m->labels = NULL;
4648
4649 } while ((m = m->next) != NULL);
4650
4651 } else {
4652 // Reduce the list of labels in the gsc_SimData
4653 gsc_LabelID* new_label_ids = gsc_malloc_wrap(sizeof(gsc_LabelID) * (d->n_labels - 1),GSC_FALSE);
4654 if (new_label_ids == NULL) {
4655 for (GSC_ID_T i = label_ix; i < d->n_labels - 1; ++i) {
4656 d->label_ids[i] = d->label_ids[i+1];
4657 }
4659 } else {
4660 memcpy(new_label_ids,d->label_ids,sizeof(*d->label_ids)*label_ix);
4661 memcpy(new_label_ids + label_ix,d->label_ids + label_ix + 1, sizeof(*d->label_ids)*(d->n_labels - 1 - label_ix));
4662 GSC_FREE(d->label_ids);
4663 d->label_ids = new_label_ids;
4664 }
4665
4666 int* new_label_defaults = gsc_malloc_wrap(sizeof(int) * (d->n_labels - 1),GSC_FALSE);
4667 if (new_label_defaults == NULL) {
4668 for (GSC_ID_T i = label_ix; i < d->n_labels - 1; ++i) {
4669 d->label_defaults[i] = d->label_defaults[i+1];
4670 }
4671 // no need to overwrite default
4672 } else {
4673 memcpy(new_label_defaults,d->label_defaults,sizeof(*d->label_defaults)*label_ix);
4674 memcpy(new_label_defaults + label_ix,d->label_defaults + label_ix + 1, sizeof(*d->label_defaults)*(d->n_labels - 1 - label_ix));
4676 d->label_defaults = new_label_defaults;
4677 }
4678 d->n_labels --;
4679
4680 // Remove the label from the gsc_AlleleMatrix linked list
4681 gsc_AlleleMatrix* m = d->m;
4682 do {
4683 GSC_FREE(m->labels[label_ix]);
4684
4685 m->n_labels = d->n_labels;
4686 int** new_label_lookups = gsc_malloc_wrap(sizeof(int*) * (m->n_labels),GSC_FALSE);
4687 if (new_label_lookups == NULL) {
4688 for (GSC_ID_T i = label_ix; i < m->n_labels; ++i) {
4689 m->labels[i] = m->labels[i+1];
4690 }
4691 m->labels[m->n_labels + 1] = NULL;
4692 } else {
4693 memcpy(new_label_lookups, m->labels, sizeof(*m->labels)*label_ix);
4694 memcpy(new_label_lookups + label_ix, m->labels + label_ix + 1, sizeof(*m->labels)*(m->n_labels - label_ix));
4695 GSC_FREE(m->labels);
4696 m->labels = new_label_lookups;
4697 }
4698 } while ((m = m->next) != NULL);
4699 }
4700}
4701
4709 if (g->marker_names != NULL) {
4710 for (GSC_GENOLEN_T i = 0; i < g->n_markers; i++) {
4711 if (g->marker_names[i] != NULL) {
4712 GSC_FREE(g->marker_names[i]);
4713 }
4714 }
4716 g->marker_names = NULL;
4717 }
4718 if (g->names_alphabetical != NULL) {
4720 g->names_alphabetical = NULL;
4721 }
4722 g->n_markers = 0;
4723
4724 if (g->map_ids != NULL) {
4725 GSC_FREE(g->map_ids);
4726 g->map_ids = NULL;
4727 }
4728
4729 if (g->maps != NULL) {
4730 for (GSC_ID_T i = 0; i < g->n_maps; ++i) {
4732 }
4733 GSC_FREE(g->maps);
4734 g->maps = NULL;
4735 }
4736 g->n_maps = 0;
4737}
4738
4747 GSC_ID_T map_ix;
4748 if (which_map.id == GSC_NO_LABEL.id || (map_ix = gsc_get_index_of_map(d, which_map)) == GSC_NA_IDX) {
4749 fprintf(stderr, "Nonexistent recombination map %lu\n", (long unsigned int) which_map.id);
4750 return;
4751 }
4752
4753 if (d->genome.n_maps == 1) {
4755 d->genome.map_ids = NULL;
4757 GSC_FREE(d->genome.maps);
4758 d->genome.maps = NULL;
4759 d->genome.n_maps = 0;
4760 } else {
4761 d->genome.n_maps--;
4764 if (tmplist == NULL) {
4765 gsc_RecombinationMap clearedmap = d->genome.maps[map_ix];
4766 for (GSC_ID_T i = map_ix; i < d->genome.n_maps - 1; ++i) {
4767 d->genome.maps[i] = d->genome.maps[i+1];
4768 }
4769 d->genome.maps[d->genome.n_maps] = clearedmap;
4770 } else {
4771 memcpy(tmplist, d->genome.maps, sizeof(*d->genome.maps)*map_ix);
4772 memcpy(tmplist + map_ix, d->genome.maps + map_ix + 1, sizeof(*d->genome.maps)*(d->genome.n_maps - map_ix));
4773 GSC_FREE(d->genome.maps);
4774 d->genome.maps = tmplist;
4775 }
4776
4777 gsc_MapID* tmpids = gsc_malloc_wrap(sizeof(*d->genome.map_ids)*d->genome.n_maps, GSC_FALSE);
4778 if (tmpids == NULL) {
4779 for (GSC_ID_T i = map_ix; i < d->genome.n_maps - 1; ++i) {
4780 d->genome.map_ids[i] = d->genome.map_ids[i+1];
4781 }
4783 } else {
4784 memcpy(tmpids, d->genome.map_ids, sizeof(*d->genome.map_ids)*map_ix);
4785 memcpy(tmpids + map_ix, d->genome.map_ids + map_ix + 1, sizeof(*d->genome.map_ids)*(d->genome.n_maps - map_ix));
4787 d->genome.map_ids = tmpids;
4788 }
4789 }
4790}
4791
4802 if (m->chrs != NULL) {
4803 for (GSC_GENOLEN_T i = 0; i < m->n_chr; ++i) {
4804 switch (m->chrs[i].type) {
4805 case GSC_LINKAGEGROUP_SIMPLE:
4808 m->chrs[i].map.simple.n_markers = 0;
4809 if (m->chrs[i].map.simple.dists != NULL) {
4810 GSC_FREE(m->chrs[i].map.simple.dists);
4811 m->chrs[i].map.simple.dists = NULL;
4812 }
4813 break;
4814 case GSC_LINKAGEGROUP_REORDER:
4816 m->chrs[i].map.reorder.n_markers = 0;
4817 if (m->chrs[i].map.reorder.dists != NULL) {
4818 GSC_FREE(m->chrs[i].map.reorder.dists);
4819 m->chrs[i].map.reorder.dists = NULL;
4820 }
4821 if (m->chrs[i].map.reorder.marker_indexes != NULL) {
4823 m->chrs[i].map.reorder.marker_indexes = NULL;
4824 }
4825 break;
4826 }
4827 }
4828 GSC_FREE(m->chrs);
4829 m->chrs = NULL;
4830 }
4831}
4832
4842 if (m == NULL) {
4843 return;
4844 }
4845 gsc_AlleleMatrix* next;
4846 do {
4847 /* free the big data matrix */
4848 for (GSC_LOCALX_T i = 0; i < CONTIG_WIDTH; i++) {
4849 if (m->alleles[i] != NULL) {
4850 GSC_FREE(m->alleles[i]);
4851 }
4852
4853 }
4854
4855 // free names
4856 for (GSC_LOCALX_T i = 0; i < CONTIG_WIDTH; i++) {
4857 if (m->names[i] != NULL) {
4858 GSC_FREE(m->names[i]);
4859 }
4860 }
4861
4862 // free labels
4863 for (GSC_ID_T i = 0; i < m->n_labels; ++i) {
4864 if (m->labels[i] != NULL) {
4865 GSC_FREE(m->labels[i]);
4866 }
4867 }
4868 if (m->labels != NULL) {
4869 GSC_FREE(m->labels);
4870 }
4871
4872 next = m->next;
4873 GSC_FREE(m);
4874 } while ((m = next) != NULL);
4875}
4876
4886 if (m->effect_names != NULL) {
4888 }
4889 m->effect_names = NULL;
4890}
4891
4899 if (m == NULL) {
4900 return;
4901 }
4902
4904
4905 if (m->n_eff_sets > 0) {
4907 for (GSC_ID_T i = 0; i < m->n_eff_sets; ++i) {
4908 gsc_delete_effect_matrix(&(m->e[i]));
4909 }
4910 GSC_FREE(m->e);
4911 }
4912
4914
4915 if (m->n_labels > 0) {
4916 if (m->label_ids != NULL) {
4917 GSC_FREE(m->label_ids);
4918 }
4919 if (m->label_defaults != NULL) {
4921 }
4922 }
4923
4924 if (m != NULL) {
4925 GSC_FREE(m);
4926 }
4927}
4928
4939 for (GSC_ID_T i = 0; i < b->num_blocks; ++i) {
4941 }
4943 b->markers_in_block = NULL;
4945 b->num_markers_in_block = NULL;
4946 b->num_blocks = 0;
4947
4948 return;
4949}
4950
4951
4966 it->am = NULL;
4967 //it->group = GSC_NO_GROUP;
4968 it->localPos = GSC_NA_LOCALX;
4969 it->cachedAM = NULL;
4970 it->cachedAMIndex = UINT_MAX;
4971 it->atEnd = GSC_TRUE;
4972 it->atStart = GSC_TRUE;
4973}
4974
4987 it->d = NULL;
4988 //it->group = GSC_NO_GROUP;
4989 if (it->cacheSize > 0) {
4990 GSC_FREE(it->cache);
4991 }
4992 it->cache = NULL;
4993 it->cacheSize = 0;
4995 it->groupSize = 0;
4996}
4997
4998/*-------------------------------gsc_SimData loaders-----------------------------*/
4999
5005 FILE* fp;
5006 if ((fp = fopen(filename, "r")) == NULL) {
5007 fprintf(stderr, "Failed to open file %s.\n", filename);
5008 }
5009
5010 gsc_TableFileReader tfr = { .fp = fp,
5011 .buf = { 0 },
5012 .buf_fill = 0,
5013 .cursor = 0,
5014 };
5015
5016 if (fp != NULL) {
5017 tfr.buf_fill = fread(tfr.buf,1,sizeof(tfr.buf),fp);
5018 }
5019 return tfr;
5020}
5021
5025 if (tbl->fp != NULL) { fclose(tbl->fp); }
5026 tbl->fp = NULL;
5027}
5028
5037 tbl->cursor = 0;
5038 if (tbl->fp != NULL) {
5039 tbl->buf_fill = fread(tbl->buf,1,sizeof(tbl->buf),tbl->fp);
5040 } else {
5041 tbl->buf_fill = 0;
5042 }
5043}
5044
5051 if (tbl->buf_fill <= tbl->cursor) {
5052 if (tbl->buf_fill < sizeof(tbl->buf)) { // last read did not fill the entire buffer
5054 }
5056 }
5057
5058 switch (tbl->buf[tbl->cursor]) {
5059 case '\r': // allow '\r' or '\r\n' as end of lines. also allow '\n' as end of line (see following case)
5060 case '\n':
5061 return GSC_TABLEFILE_NEWLINE;
5062 case '\t':
5063 case ' ':
5064 case ',':
5066 default:
5068 }
5069}
5070
5080 if (c->cell_len > 0 && c->isCellShallow) {
5081 char* deepcell = gsc_malloc_wrap(sizeof(char)*(c->cell_len+1), GSC_TRUE);
5082 memcpy(deepcell,c->cell,sizeof(char)*c->cell_len);
5083 deepcell[c->cell_len] = '\0';
5084 c->cell = deepcell;
5086 }
5087}
5088
5095 gsc_TableFileCell cur = { .isCellShallow = GSC_TRUE, .cell = NULL, .cell_len = 0,
5096 .predCol = 0, .predNewline = 0, .eof = GSC_FALSE };
5097
5098 GSC_CREATE_BUFFER(tmpcell,char,1);
5099 size_t tmpix = 0;
5100 size_t tblbuf_offset = 0;
5101 size_t tblbuf_len = 0;
5102 int predCarriageReturn = 0; // for detecting /r/n as a single "newline"
5103 _Bool warned = 0;
5104
5105 while (1) {
5107 if (0 < predCarriageReturn) { --predCarriageReturn; } // decremented each time step
5108
5109 if (0 == cur.cell_len) {
5110 switch (type) {
5112 if (tbl->buf[tbl->cursor] == '\r') {
5113 predCarriageReturn = 2; // will have value 1 at next loop iteration, then will fall back to 0
5114 }
5115 if (!(predCarriageReturn && tbl->buf[tbl->cursor] == '\n')) {
5116 ++cur.predNewline;
5117 }
5118 cur.predCol = 0;
5119 ++tbl->cursor;
5120 break;
5121
5123 ++tbl->cursor;
5124 ++cur.predCol;
5125 break;
5126
5128 // just refill as we have no contents we need to save yet
5130 if (0 < predCarriageReturn) { ++predCarriageReturn; } // should not tick down the counter this loop iteration
5131 break;
5132
5134 tblbuf_offset = tbl->cursor; tblbuf_len = 1; // in case we need to make a deep copy later.
5135 cur.cell = tbl->buf + tbl->cursor;
5136 ++cur.cell_len;
5137 ++tbl->cursor;
5138 break;
5139
5140 default:
5141 ++tbl->cursor;
5142 cur.eof = GSC_TRUE;
5143 return cur;
5144 }
5145
5146 } else { // have found the cell, just need to read the rest of it
5147 switch (type) {
5149 ++tbl->cursor;
5150 ++tblbuf_len;
5151 ++cur.cell_len;
5152 break;
5153
5156
5157 if (!warned && tblbuf_len > 8192) {
5158 warned = 1;
5159 fprintf(stderr,"Warning: very long cell identified beginning %c%c%c%c%c%c. Column separators may have failed to be recognised\n",
5160 tmpcell[0],tmpcell[1],tmpcell[2],tmpcell[3],tmpcell[4],tmpcell[5]);
5161 }
5162
5163 GSC_STRETCH_BUFFER(tmpcell,tmpix + tblbuf_len + 1);
5164 memcpy(tmpcell+tmpix,tbl->buf+tblbuf_offset,sizeof(char)*tblbuf_len);
5165 tmpix += tblbuf_len;
5166 tmpcell[tmpix] = '\0';
5167
5168 tblbuf_offset = 0; tblbuf_len = 0;
5170 break;
5171
5173 ++tbl->cursor;
5174 cur.eof = GSC_TRUE;
5175 // fall through
5176 default: // newline or column gap or end of file discovered: save and return.
5177 if (!cur.isCellShallow) {
5178 cur.cell = gsc_malloc_wrap(sizeof(char)*(cur.cell_len + 1),GSC_TRUE);
5179 memcpy(cur.cell,tmpcell,sizeof(char)*tmpix);
5180 if (0 < tblbuf_len) {
5181 memcpy(cur.cell+tmpix,tbl->buf+tblbuf_offset,sizeof(char)*tblbuf_len);
5182 }
5183 cur.cell[cur.cell_len] = '\0';
5184 GSC_DELETE_BUFFER(tmpcell);
5185 }
5186 return cur;
5187 }
5188 }
5189 }
5190}
5191
5201_Bool gsc_get_index_of_genetic_marker(const char* target,
5202 gsc_KnownGenome g,
5203 GSC_GENOLEN_T* out) {
5204 GSC_GENOLEN_T first = 0, last = g.n_markers - 1;
5205 GSC_GENOLEN_T index = (first + last) / 2;
5206 int comparison = strcmp(target,*(g.names_alphabetical[index]));
5207 while (comparison != 0 && first <= last) {
5208 if (comparison == 0) {
5209 if (out != NULL) *out = g.names_alphabetical[index] - g.marker_names;
5210 return 1;
5211 } else if (comparison > 0) {
5212 first = index + 1;
5213 if (first >= g.n_markers) { return 0; }
5214 } else {
5215 if (index == 0) { return 0; }
5216 last = index - 1;
5217 }
5218
5219 // index has been updated, no matter the branch.
5220 index = (first + last) / 2;
5221 comparison = strcmp(target, *(g.names_alphabetical[index]));
5222 }
5223
5224 if (first > last) {
5225 return 0;
5226 }
5227 if (out != NULL) *out = g.names_alphabetical[index] - g.marker_names;
5228 return 1;
5229}
5230
5241 gsc_TableFileCell** queue,
5242 size_t* queuesize) {
5243 gsc_TableFileCell ncell;
5244 if (*queuesize > 0) {
5245 ncell = *queue[0];
5246 /*for (int i = 1; i < *queuesize; ++i) { *queue[i-1] = *queue[i]; }*/
5247 ++*queue;
5248 --*queuesize;
5249 } else {
5251 }
5252 return ncell;
5253}
5254
5286 const char** canonical_titles,
5287 int* col_order,
5288 gsc_TableFileCell* unprocessedqueue,
5289 size_t* queuesize) {
5290 const int ncells = 3;
5291
5292 // assume unprocessedqueue has at least 4 spaces, titles has 3 entries and so does col_order
5293 size_t newest = 0;
5294 size_t onelinefile = GSC_FALSE;
5295 for (; newest < ncells; ++newest) {
5296 unprocessedqueue[newest] = gsc_tablefilereader_get_next_cell(tf);
5297 if ((unprocessedqueue[newest]).eof) {
5298 if (newest + 1 < ncells) {
5299 if (!((unprocessedqueue[newest]).isCellShallow)) { GSC_FREE((unprocessedqueue[newest]).cell); }
5300 *queuesize = newest; // newest does not exist in return group
5301 return GSC_NA;
5302 } else { // column 3 has eof at the end of it. That's okay.
5303 onelinefile = GSC_TRUE;
5304 *queuesize = newest + 1;
5305 }
5306 } else if ((unprocessedqueue[newest]).predNewline) {
5307 *queuesize = newest+1; // newest index included in return group
5308 return GSC_NA;
5309 }
5310 gsc_tablefilecell_deep_copy(unprocessedqueue + newest);
5311 }
5312 if (!onelinefile) {
5313 unprocessedqueue[newest] = gsc_tablefilereader_get_next_cell(tf); // four cell
5314 *queuesize = newest + 1;
5315 }
5316
5317 // Check which ordering of the three titles it is.
5318 for (int i1 = 0; i1 < ncells; ++i1) {
5319 if (strcmp((unprocessedqueue[i1]).cell,canonical_titles[0]) == 0) {
5320 for (int inc = 1; inc < ncells; ++inc) {
5321 int i2 = (i1 + inc) % ncells;
5322 int i3 = (i1 + (ncells - inc)) % ncells;
5323 if (strcmp((unprocessedqueue[i2]).cell,canonical_titles[1]) == 0 &&
5324 strcmp((unprocessedqueue[i3]).cell,canonical_titles[2]) == 0) {
5325 col_order[0] = i1 + 1;
5326 col_order[1] = i2 + 1;
5327 col_order[2] = i3 + 1;
5328 GSC_FREE((unprocessedqueue[0]).cell);
5329 GSC_FREE((unprocessedqueue[1]).cell);
5330 GSC_FREE((unprocessedqueue[2]).cell);
5331 unprocessedqueue[0] = unprocessedqueue[3];
5332 *queuesize = 1;
5333 return GSC_TRUE;
5334 }
5335 }
5336 }
5337 }
5338
5339 return GSC_FALSE;
5340}
5341
5375static size_t gsc_helper_parse_mapfile(const char* filename, struct gsc_MapfileUnit** out) {
5376 if (filename == NULL) return 0;
5377
5379
5380 size_t row = 1;
5381 size_t col = 1;
5382
5383 gsc_TableFileCell cellsread[4] = { 0 };
5384 gsc_TableFileCell* cellqueue = cellsread;
5385 size_t queue_size;
5386 const char* titles[] = { "marker", "chr", "pos"};
5387 int colnums[] = { 1, 2, 3 };
5388 GSC_LOGICVAL header = gsc_helper_parse_3cell_header(&tf, titles, colnums, cellqueue, &queue_size);
5389 if (header == GSC_TRUE) {
5390 printf("(Loading %s) Format: map file with header\n", filename);
5391 } else if (header == GSC_FALSE) {
5392 printf("(Loading %s) Format: map file without header\n", filename);
5393 } else {
5394 printf("(Loading %s) Failure: Cannot identify the expected 3 columns of the map file\n", filename);
5396 return 0;
5397 }
5398 int marker_colnum = colnums[0], chr_colnum = colnums[1], pos_colnum = colnums[2];
5399
5400 _Bool goodrow = (header) ? 0 : 1; // discard first row if it's a header, keep if it's not.
5401 size_t goodrow_counter = 0;
5402
5403 char* marker = NULL;
5404 unsigned long chr = 0;
5405 double pos = 0;
5406 char* conversionflag;
5407
5409
5410 gsc_TableFileCell ncell;
5411 do {
5412 ncell = gsc_helper_tablefilereader_get_next_cell_wqueue(&tf, &cellqueue, &queue_size);
5413
5414 // Update row/col position and save predecessor row
5415 if (ncell.cell != NULL) {
5416 if (ncell.predNewline) {
5417 if (goodrow) { // save predecessor row
5418 buffer[goodrow_counter].name = marker;
5419 buffer[goodrow_counter].chr = chr;
5420 buffer[goodrow_counter].pos = pos;
5421
5422 ++goodrow_counter;
5423 if (goodrow_counter >= buffercap) {
5424 GSC_STRETCH_BUFFER(buffer,2*row);
5425 }
5426 marker = NULL;
5427 } else if (marker != NULL) {
5428 GSC_FREE(marker);
5429 }
5430 row += ncell.predNewline;
5431 goodrow = 1;
5432 col = 1;
5433 }
5434 col += (ncell.predCol > 0) ? 1 : 0;
5435
5436 // Parse this cell
5437 if (ncell.cell_len == 0) {
5438 goodrow = 0;
5439 } if (col == marker_colnum) {
5441 marker = ncell.cell;
5442 ncell.isCellShallow = GSC_TRUE; // so it isn't freed.
5443
5444 } else if (col == chr_colnum) {
5445 char tmp = ncell.cell[ncell.cell_len]; ncell.cell[ncell.cell_len] = '\0';
5446 chr = strtoul(ncell.cell,&conversionflag,36);
5447 ncell.cell[ncell.cell_len] = tmp;
5448 if (conversionflag != ncell.cell + ncell.cell_len) { // unsuccessful read
5449 //fprintf(stderr,"Entry at row %i column %i of file %s could not be parsed as an integer or alphanumeric string\n", row, chr_colnum, filename);
5450 goodrow = 0;
5451 }
5452
5453 } else if (col == pos_colnum) {
5454 char tmp = ncell.cell[ncell.cell_len]; ncell.cell[ncell.cell_len] = '\0';
5455 pos = strtod(ncell.cell,&conversionflag);
5456 ncell.cell[ncell.cell_len] = tmp;
5457 if (conversionflag != ncell.cell + ncell.cell_len) { // unsuccessful read
5458 goodrow = 0;
5459 //fprintf(stderr,"Entry at row %i column %i of file %s could not be parsed as a numeric value\n", row, pos_colnum, filename);
5460 }
5461
5462 } else {
5463 goodrow = 0;
5464 }
5465
5466 // Reset to get next cell.
5467 if (!ncell.isCellShallow) { GSC_FREE(ncell.cell); }
5468 }
5469 } while (!ncell.eof);
5470
5471 if (col == 3) {
5472 if (goodrow) { // save predecessor row
5473 buffer[goodrow_counter].name = marker;
5474 buffer[goodrow_counter].chr = chr;
5475 buffer[goodrow_counter].pos = pos;
5476
5477 ++goodrow_counter;
5478 marker = NULL;
5479 } else if (marker != NULL) {
5480 GSC_FREE(marker);
5481 }
5482 }
5483 //row -= ncell.predNewline; // don't count trailing newlines in stats.
5484
5485 printf("(Loading %s) %u marker(s) with map positions were loaded. Failed to parse %u line(s).\n", filename, (unsigned int) goodrow_counter, (unsigned int) (row - header - goodrow_counter));
5487
5488 // Check outputs. We don't delete the buffers because we want to leave them alive with our callers holding the handles.
5489 GSC_FINALISE_BUFFER(buffer,*out,goodrow_counter);
5490 return goodrow_counter;
5491}
5492
5493
5511 GSC_GENOLEN_T n_markers_in_list,
5512 struct gsc_MapfileUnit** markerlist) {
5513 struct gsc_MapfileUnit* rlist = *markerlist;
5514 GSC_GENOLEN_T n_joined = 0;
5515 /*size_t consecutivity_bias; // we cache the index of the last name we found and pre-check whether the next marker
5516 // in the list is the next marker in the genome. For the case where people organise their genotype file and genetic
5517 // map file in the same order. Edit: decided this is not likely enough a situation to build this in.*/
5518
5519 for (GSC_GENOLEN_T i = 0; i < n_markers_in_list; ++i) {
5520 if (rlist[i].name != NULL) {
5521 GSC_GENOLEN_T nameix;
5522 if (gsc_get_index_of_genetic_marker(rlist[i].name, g, &nameix)) {
5523 if (n_joined != i) {
5524 rlist[n_joined] = rlist[i];
5525 }
5526 n_joined++;
5527
5528 } else { // discard this marker. n_joined lags behind i by one more step.
5529 GSC_FREE(rlist[i].name);
5530
5531 }
5532 }
5533 }
5534
5535 return n_joined;
5536}
5537
5538
5542 GSC_ID_T newmapindex = 0;
5543 if (d->genome.n_maps > 0) {
5544 newmapindex = d->genome.n_maps;
5545
5546 gsc_MapID* tmpMapIDs = gsc_malloc_wrap(sizeof(gsc_MapID)*(newmapindex+1),GSC_TRUE);
5547 memcpy(tmpMapIDs,d->genome.map_ids,sizeof(gsc_MapID)*newmapindex);
5549 d->genome.map_ids = tmpMapIDs;
5550
5551 gsc_RecombinationMap* tmpMaps = gsc_malloc_wrap(sizeof(gsc_RecombinationMap)*(newmapindex+1),GSC_TRUE);
5552 memcpy(tmpMaps,d->genome.maps,sizeof(gsc_RecombinationMap)*newmapindex);
5553 GSC_FREE(d->genome.maps);
5554 d->genome.maps = tmpMaps;
5555
5556 } else {
5559 }
5560 d->genome.map_ids[newmapindex] = gsc_get_new_map_id(d);
5561 d->genome.n_maps++;
5562 d->genome.maps[newmapindex] = map;
5563
5564 return d->genome.map_ids[newmapindex];
5565}
5566
5570 GSC_ID_T neweffsetindex = 0;
5571 if (d->n_eff_sets > 0) {
5572 neweffsetindex = d->n_eff_sets;
5573
5574 gsc_EffectID* tmpIDs = gsc_malloc_wrap(sizeof(gsc_EffectID)*(neweffsetindex+1),GSC_TRUE);
5575 memcpy(tmpIDs,d->eff_set_ids,sizeof(gsc_EffectID)*neweffsetindex);
5577 d->eff_set_ids = tmpIDs;
5578
5579 gsc_EffectMatrix* tmpMats = gsc_malloc_wrap(sizeof(gsc_EffectMatrix)*(neweffsetindex+1),GSC_TRUE);
5580 memcpy(tmpMats,d->e,sizeof(gsc_EffectMatrix)*neweffsetindex);
5581 GSC_FREE(d->e);
5582 d->e = tmpMats;
5583
5584 } else {
5587 }
5588 d->eff_set_ids[neweffsetindex] = gsc_get_new_eff_set_id(d);
5589 d->n_eff_sets++;
5590 d->e[neweffsetindex] = effset;
5591
5592 return d->eff_set_ids[neweffsetindex];
5593}
5594
5601static void gsc_helper_sort_markerlist(GSC_GENOLEN_T n_markers, struct gsc_MapfileUnit* markerlist) {
5602 if (n_markers < 2) { return; }
5603
5604 // sort by linkage group
5605 qsort(markerlist,n_markers,sizeof(*markerlist),gsc_helper_mapfileunit_ascending_chr_comparer);
5606
5607 // sort each linkage group by pos
5608 //int n_chr = 1;
5609 GSC_GENOLEN_T chr_start = 0;
5610 unsigned long current_chr = markerlist[0].chr;
5611
5612 for (GSC_GENOLEN_T i = 1; i < n_markers; ++i) {
5613 if (markerlist[i].chr != current_chr) { // found end of current chr
5614 //n_chr++;
5615 qsort(markerlist + chr_start, i - chr_start,
5616 sizeof(*markerlist), gsc_helper_mapfileunit_ascending_d_comparer);
5617
5618 chr_start = i;
5619 current_chr = markerlist[i].chr;
5620 }
5621 }
5622
5623 qsort(markerlist + chr_start, n_markers - chr_start,
5624 sizeof(*markerlist), gsc_helper_mapfileunit_ascending_d_comparer);
5625 //return n_chr;
5626}
5627
5642 GSC_GENOLEN_T n_markers,
5643 struct gsc_MapfileUnit* markerlist) {
5644 if (n_markers == 0) return NO_MAP;
5645
5646 GSC_CREATE_BUFFER(chr_nmembers,GSC_GENOLEN_T,40);
5647 memset(chr_nmembers,0,sizeof(*chr_nmembers)*40);
5648 chr_nmembers[0] = 1;
5649 GSC_GENOLEN_T n_chr = 1;
5650 unsigned long current_chr = markerlist[0].chr;
5651 for (GSC_GENOLEN_T i = 1; i < n_markers; ++i) {
5652 while (i < n_markers && markerlist[i].name == NULL) {
5653 ++i;
5654 }
5655 if (current_chr != markerlist[i].chr) {
5656 // First of next
5657 if (n_chr >= chr_nmemberscap) {
5658 GSC_STRETCH_BUFFER(chr_nmembers,2*n_chr);
5659 memset(chr_nmembers+n_chr,0,sizeof(*chr_nmembers)*n_chr);
5660 }
5661 ++n_chr;
5662 current_chr = markerlist[i].chr;
5663 chr_nmembers[n_chr-1] = 1;
5664 } else {
5665 ++(chr_nmembers[n_chr-1]);
5666 }
5667 }
5668
5669 gsc_RecombinationMap map = {.n_chr=n_chr, .chrs=gsc_malloc_wrap(sizeof(gsc_LinkageGroup) * n_chr, GSC_TRUE) };
5670
5671 // Populate the map. Each chr/linkage group may be "Simple" or "Reordered"
5672 GSC_GENOLEN_T could_not_match = 0;
5673 GSC_GENOLEN_T current_marker = 0;
5674 GSC_GENOLEN_T first_marker;
5675 GSC_GENOLEN_T n_bad_chr = 0;
5676 for (GSC_GENOLEN_T chr_ix = 0; chr_ix < map.n_chr; ++chr_ix) {
5677 first_marker = current_marker;
5678 double chrdist = markerlist[first_marker + chr_nmembers[chr_ix] - 1].pos - markerlist[first_marker].pos;
5679 double* lgdists = gsc_malloc_wrap(sizeof(double)*(chr_nmembers[chr_ix]),GSC_TRUE);
5680
5681 char found_first = GSC_FALSE;
5682 // n_goodmembers == 0 is a guard on firsts_coord_in_genome, but we
5683 // still initialise it here (to a value too high to be reasonable)
5684 // because the compiler can't tell that.
5685 GSC_GENOLEN_T firsts_coord_in_genome = d->genome.n_markers;
5686 GSC_GENOLEN_T n_goodmembers = 0;
5687 GSC_GENOLEN_T* marker_coords = NULL;
5688
5689 GSC_GENOLEN_T endpt = first_marker + chr_nmembers[chr_ix];
5690 for (; current_marker < endpt; ++current_marker) { // simple recombination map, if possible
5691 if (markerlist[current_marker].name == NULL) {
5692 continue;
5693 }
5694
5695 if (!found_first) {
5696 GSC_GENOLEN_T coord;
5697 if (!gsc_get_index_of_genetic_marker(markerlist[current_marker].name, d->genome, &coord)) {
5698 could_not_match++;
5699 } else {
5700 found_first = GSC_TRUE;
5701 first_marker = current_marker;
5702 firsts_coord_in_genome = coord;
5703 lgdists[n_goodmembers] = (markerlist[current_marker].pos - markerlist[first_marker].pos) / chrdist;
5704 n_goodmembers++;
5705 }
5706 } else if (firsts_coord_in_genome + n_goodmembers < d->genome.n_markers &&
5707 strcmp(markerlist[current_marker].name, d->genome.marker_names[firsts_coord_in_genome + n_goodmembers]) == 0) {
5708 // we are a simple linkage group still so far.
5709 lgdists[n_goodmembers] = (markerlist[current_marker].pos - markerlist[first_marker].pos) / chrdist;
5710 n_goodmembers++;
5711 } else {
5712 // Just discovered we are a reordered linkage group. Copy over the marker indexes that were as expected.
5713 marker_coords = gsc_malloc_wrap(sizeof(*marker_coords)*(chr_nmembers[chr_ix]),GSC_TRUE);
5714 for (GSC_GENOLEN_T backfill = 0; backfill < n_goodmembers; ++backfill) {
5715 marker_coords[backfill] = firsts_coord_in_genome + backfill;
5716 }
5717 break;
5718 }
5719
5720 }
5721 for (; current_marker < endpt; ++current_marker) { // reordered recombination map, if previous failed.
5722 if (markerlist[current_marker].name == NULL) {
5723 continue;
5724 }
5725
5726 GSC_GENOLEN_T coord;
5727 if (!gsc_get_index_of_genetic_marker(markerlist[current_marker].name, d->genome, &coord)) {
5728 ++could_not_match;
5729 } else {
5730 marker_coords[n_goodmembers] = coord;
5731 lgdists[n_goodmembers] = (markerlist[current_marker].pos - markerlist[first_marker].pos) / chrdist;
5732 ++n_goodmembers;
5733 }
5734 }
5735
5736 if (n_goodmembers == 0) { // || firsts_coord_in_genome >= d->genome.n_markers) {
5737 n_bad_chr++;
5738 } else if (marker_coords == NULL) {
5739 GSC_GENOLEN_T chr_ix_actual = chr_ix-n_bad_chr;
5740 map.chrs[chr_ix_actual].type = GSC_LINKAGEGROUP_SIMPLE;
5741 map.chrs[chr_ix_actual].map.simple.expected_n_crossovers = chrdist / 100;
5742 map.chrs[chr_ix_actual].map.simple.n_markers = n_goodmembers;
5743 map.chrs[chr_ix_actual].map.simple.first_marker_index = firsts_coord_in_genome;
5744 map.chrs[chr_ix_actual].map.simple.dists = lgdists;
5745 } else {
5746 GSC_GENOLEN_T chr_ix_actual = chr_ix-n_bad_chr;
5747 map.chrs[chr_ix_actual].type = GSC_LINKAGEGROUP_REORDER;
5748 map.chrs[chr_ix_actual].map.reorder.expected_n_crossovers = chrdist / 100;
5749 map.chrs[chr_ix_actual].map.reorder.n_markers = n_goodmembers;
5750 map.chrs[chr_ix_actual].map.reorder.marker_indexes = marker_coords;
5751 map.chrs[chr_ix_actual].map.reorder.dists = lgdists;
5752 }
5753 }
5754 GSC_DELETE_BUFFER(chr_nmembers);
5755 map.n_chr = map.n_chr-n_bad_chr;
5756 if (map.n_chr == 0) {
5757 GSC_FREE(map.chrs);
5758 return NO_MAP;
5759 }
5761}
5762
5763
5780 GSC_GENOLEN_T n_markers,
5781 char** markernames,
5782 double expected_n_recombinations) {
5784
5785 if (markernames == NULL) {
5786 if (d->genome.n_markers == 0) return NO_MAP;
5787
5788 double* lgdists = gsc_malloc_wrap(sizeof(double)*d->genome.n_markers,GSC_TRUE);
5789 double lgdist = 1./d->genome.n_markers;
5790 for (GSC_GENOLEN_T i = 0; i < d->genome.n_markers; ++i) { lgdists[i] = lgdist; }
5791
5792 map.chrs[0].type = GSC_LINKAGEGROUP_SIMPLE;
5793 map.chrs[0].map.simple.expected_n_crossovers = expected_n_recombinations;
5795 map.chrs[0].map.simple.first_marker_index = 0;
5796 map.chrs[0].map.simple.dists = lgdists;
5797 } else {
5798 if (n_markers == 0) return NO_MAP;
5799
5800 // markernames could still be simple or reordered compared to the d->genome, so need to check that first.
5801 _Bool found_first = 0;
5802 GSC_GENOLEN_T could_not_match;
5803 GSC_GENOLEN_T firsts_coord_in_genome = d->genome.n_markers;
5804 GSC_GENOLEN_T chrmarker_ix = 0;
5805
5806 GSC_GENOLEN_T* marker_coords = NULL;
5807 for (GSC_GENOLEN_T i = 0; i < n_markers; ++i) {
5808 if (!found_first || marker_coords != NULL) {
5809 // We are first or we are a reordered linkage group. Find what index in the genome the next marker is stored at.
5810 GSC_GENOLEN_T coord;
5811
5812 if (markernames[i] == NULL) {
5813 could_not_match++;
5814 } else if (!gsc_get_index_of_genetic_marker(markernames[i], d->genome, &coord )) {
5815 could_not_match++;
5816 } else if (!found_first) {
5817 found_first = 1;
5818 firsts_coord_in_genome = coord;
5819 chrmarker_ix++;
5820 } else { // must be the case that we have marker_coords != NULL and are a reordered linkage group
5821 marker_coords[chrmarker_ix] = coord;
5822 chrmarker_ix++;
5823 }
5824
5825 } else if (firsts_coord_in_genome < d->genome.n_markers &&
5826 strcmp(markernames[i], d->genome.marker_names[firsts_coord_in_genome + i]) == 0) {
5827 // are a simple linkage group still so far.
5828 chrmarker_ix++;
5829
5830 } else {
5831 // Just discovered we are a reordered linkage group. Copy over the marker indexes that were as expected.
5832 marker_coords = gsc_malloc_wrap(sizeof(*marker_coords)*n_markers,GSC_TRUE);
5833 for (GSC_GENOLEN_T backfill = 0; backfill < chrmarker_ix; ++backfill) {
5834 marker_coords[backfill] = firsts_coord_in_genome + backfill;
5835 }
5836
5837 if (markernames[i] == NULL) {
5838 could_not_match++;
5839 } else if (!gsc_get_index_of_genetic_marker(markernames[i], d->genome, &(marker_coords[chrmarker_ix]) )) {
5840 could_not_match++;
5841 } else {
5842 chrmarker_ix++;
5843 }
5844 }
5845 }
5846
5847 double* lgdists = gsc_malloc_wrap(sizeof(double)*chrmarker_ix,GSC_TRUE);
5848 double lgdist = 1./n_markers;
5849 for (GSC_GENOLEN_T i = 0; i < chrmarker_ix; ++i) { lgdists[i] = lgdist; }
5850
5851 if (marker_coords == NULL) {
5852 map.chrs[0].type = GSC_LINKAGEGROUP_SIMPLE;
5853 map.chrs[0].map.simple.expected_n_crossovers = expected_n_recombinations;
5854 map.chrs[0].map.simple.n_markers = chrmarker_ix;
5855 map.chrs[0].map.simple.first_marker_index = firsts_coord_in_genome;
5856 map.chrs[0].map.simple.dists = lgdists;
5857 } else {
5858 map.chrs[0].type = GSC_LINKAGEGROUP_REORDER;
5859 map.chrs[0].map.reorder.expected_n_crossovers = expected_n_recombinations;
5860 map.chrs[0].map.reorder.n_markers = chrmarker_ix;
5861 map.chrs[0].map.reorder.marker_indexes = marker_coords;
5862 map.chrs[0].map.reorder.dists = lgdists;
5863 }
5864 }
5865
5867}
5868
5906gsc_MapID gsc_load_mapfile(SimData* d, const char* filename) {
5907 if (filename == NULL) return NO_MAP;
5908
5909 struct gsc_MapfileUnit* mapcontents = NULL;
5910 size_t nrows = gsc_helper_parse_mapfile(filename,&mapcontents);
5911 if (nrows == 0 || mapcontents == NULL) {
5912 if (mapcontents != NULL) {
5913 GSC_FREE(mapcontents);
5914 }
5915 return NO_MAP;
5916 }
5917
5918 _Bool freeMapNames = 1;
5919 if (d->genome.n_markers > 0) {
5920 // if genome is already set, leftjoin on those markers.
5921 GSC_GENOLEN_T new_nrows = gsc_helper_str_markerlist_leftjoin(d->genome, nrows, &mapcontents);
5922 if (new_nrows < nrows) {
5923 printf("Discarded %lu markers when loading map %s because they do not appear in the primary map.\n", (long unsigned int) (nrows - new_nrows), filename);
5924 }
5925 nrows = new_nrows;
5926 gsc_helper_sort_markerlist(nrows,mapcontents);
5927 } else {
5928 // else set up the list of markers tracked by the simulation
5929 gsc_helper_sort_markerlist(nrows,mapcontents);
5930 d->genome = (gsc_KnownGenome){
5931 .n_markers = nrows,
5932 .marker_names = gsc_malloc_wrap(sizeof(char**)*nrows,GSC_TRUE),
5933 .names_alphabetical = gsc_malloc_wrap(sizeof(char**)*nrows,GSC_TRUE),
5934 .n_maps = 0,
5935 .map_ids = NULL,
5936 .maps = NULL
5937 };
5938 for (GSC_GENOLEN_T i = 0; i < d->genome.n_markers; ++i) {
5939 d->genome.marker_names[i] = mapcontents[i].name;
5940 d->genome.names_alphabetical[i] = &(d->genome.marker_names[i]);
5941 }
5942 qsort(d->genome.names_alphabetical,d->genome.n_markers,sizeof(*d->genome.names_alphabetical),gsc_helper_indirect_alphabetical_str_comparer);
5943
5944 freeMapNames = 0;
5945 //printf( "Warning: loading genetic map before loading any founder genotypes. Many simulation operations will not yet run.\n");
5946 }
5947
5948 gsc_MapID map = gsc_create_recombmap_from_markerlist(d, nrows, mapcontents);
5949 if (freeMapNames) {
5950 for (size_t i = 0; i < nrows; ++i) {
5951 GSC_FREE(mapcontents[i].name);
5952 }
5953 }
5954 GSC_FREE(mapcontents);
5955
5956 return map;
5957}
5958
5959
5996 if (filename == NULL) return GSC_NO_EFFECTSET;
5997 if (d->genome.n_markers == 0) return GSC_NO_EFFECTSET;
5998
6000
6001 size_t row = 1;
6002 size_t col = 1;
6003
6004 gsc_TableFileCell cellsread[4] = { 0 };
6005 gsc_TableFileCell* cellqueue = cellsread;
6006 const char* titles[] = { "marker", "allele", "eff"};
6007 int colnums[] = { 1, 2, 3 };
6008 size_t queuesize;
6009 GSC_LOGICVAL header = gsc_helper_parse_3cell_header(&tf, titles, colnums, cellqueue, &queuesize);
6010 if (header == GSC_TRUE) {
6011 printf("(Loading %s) Format: effect file with header\n", filename);
6012 } else if (header == GSC_FALSE) {
6013 printf("(Loading %s) Format: effect file without header\n", filename);
6014 } else {
6015 printf("(Loading %s) Failure: Cannot identify the expected 3 columns of the effect file\n", filename);
6017 return NO_EFFECTSET;
6018 }
6019 int marker_colnum = colnums[0], allele_colnum = colnums[1], eff_colnum = colnums[2];
6020
6021
6022 _Bool goodrow = (header) ? 0 : 1; // discard first row if it's a header, keep if it's not.
6023 size_t goodrow_counter = 0;
6024 GSC_ID_T allele_counter = 0;
6025
6026 GSC_CREATE_BUFFER(effset_alleles,char,2);
6027 GSC_CREATE_BUFFER(effset_rows,double*,2);
6028 GSC_ID_T n_effset_rows = 0;
6029
6030 GSC_GENOLEN_T markerix;
6031 char allele = '\0';
6032 GSC_ID_T alleleix = n_effset_rows + 1; // invalid value.
6033 double effect = 0;
6034 char* conversionflag;
6035
6036 gsc_TableFileCell ncell;
6037
6038 do {
6039 ncell = gsc_helper_tablefilereader_get_next_cell_wqueue(&tf, &cellqueue, &queuesize);
6040
6041 if (ncell.cell != NULL) { // so that we can cope with missing final newline
6042 // Update row/col position and save predecessor row
6043 if (ncell.predNewline) {
6044 if (goodrow && col >= 3) { // save predecessor row.
6045 if (alleleix == n_effset_rows) {
6046 ++allele_counter;
6047 ++n_effset_rows;
6048 if (effset_allelescap < n_effset_rows) {
6049 GSC_STRETCH_BUFFER(effset_alleles,2*n_effset_rows);
6050 GSC_STRETCH_BUFFER(effset_rows,2*n_effset_rows);
6051 }
6052 effset_alleles[alleleix] = allele;
6053 effset_rows[alleleix] = gsc_malloc_wrap(sizeof(double)*d->genome.n_markers,GSC_TRUE);
6054 memset(effset_rows[alleleix],0,sizeof(double)*d->genome.n_markers);
6055 }
6056 if (alleleix < n_effset_rows) { // this is specifically not an "else if"
6057 effset_rows[alleleix][markerix] = effect;
6058 ++goodrow_counter;
6059 }
6060 }
6061 row += ncell.predNewline;
6062 goodrow = 1;
6063 col = 1;
6064 }
6065 col += (ncell.predCol > 0) ? 1 : 0; // multiple column spacers treated as one
6066
6067 // Parse this cell
6068 if (ncell.cell_len == 0) {
6069 goodrow = 0;
6070 } else if (col == marker_colnum) {
6071 char tmp = ncell.cell[ncell.cell_len]; ncell.cell[ncell.cell_len] = '\0';
6072 _Bool validmarker = gsc_get_index_of_genetic_marker(ncell.cell,d->genome,&markerix);
6073 ncell.cell[ncell.cell_len] = tmp;
6074 if (!validmarker) {
6075 goodrow = 0;
6076 //fprintf(stderr,"Entry at row %i column %i of file %s does not match the name of a tracked marker\n", row, marker_colnum, filename);
6077 }
6078
6079 } else if (col == allele_colnum) {
6080 if (ncell.cell_len > 1) {
6081 goodrow = 0;
6082 //fprintf(stderr,"Entry at row %i column %i of file %s was too long to represent a single allele\n", row, allele_colnum, filename);
6083 }
6084 allele = ncell.cell[0];
6085 for (alleleix = 0; alleleix < n_effset_rows; ++alleleix) {
6086 if (effset_alleles[alleleix] == allele) {
6087 break;
6088 }
6089 } // leave this loop with alleleix set to match the allele, or equal to n_effset_rows if it's a new allele.
6090
6091 } else if (col == eff_colnum) {
6092 char tmp = ncell.cell[ncell.cell_len]; ncell.cell[ncell.cell_len] = '\0';
6093 effect = strtod(ncell.cell,&conversionflag);
6094 ncell.cell[ncell.cell_len] = tmp;
6095 if (conversionflag != ncell.cell + ncell.cell_len) { // unsuccessful read
6096 goodrow = 0;
6097 //fprintf(stderr,"Entry at row %i column %i of file %s could not be parsed as a numeric value\n", row, eff_colnum, filename);
6098 }
6099
6100 } else {
6101 goodrow = 0;
6102 }
6103
6104 // Reset
6105 if (!ncell.isCellShallow) { GSC_FREE(ncell.cell); }
6106 }
6107 } while (!ncell.eof);
6108
6109 if (col == 3 && goodrow) { // save predecessor row
6110 ++goodrow_counter;
6111 if (alleleix == n_effset_rows) {
6112 ++allele_counter;
6113 ++n_effset_rows;
6114 if (effset_allelescap < n_effset_rows) {
6115 GSC_STRETCH_BUFFER(effset_alleles,2*n_effset_rows);
6116 GSC_STRETCH_BUFFER(effset_rows,2*n_effset_rows);
6117 }
6118 effset_alleles[alleleix] = allele;
6119 effset_rows[alleleix] = gsc_malloc_wrap(sizeof(double)*d->genome.n_markers,GSC_TRUE);
6120 memset(effset_rows[alleleix],0,sizeof(double)*d->genome.n_markers);
6121 }
6122 effset_rows[alleleix][markerix] = effect;
6123 }
6124
6125 printf("(Loading %s) %lu effect value(s) spanning %lu allele(s) were loaded. Failed to parse %lu line(s).\n",
6126 filename, (long unsigned int) goodrow_counter, (long unsigned int) allele_counter, (long unsigned int) (row - header - goodrow_counter));
6128
6129 if (n_effset_rows > 0) {
6130 gsc_EffectMatrix effset = { 0 };
6131 GSC_FINALISE_BUFFER(effset_alleles, effset.effect_names, n_effset_rows);
6132 GSC_FINALISE_BUFFER(effset_rows, effset.effects.matrix, n_effset_rows);
6133 effset.effects.rows = n_effset_rows;
6134 effset.effects.cols = d->genome.n_markers;
6136 } else {
6137 GSC_DELETE_BUFFER(effset_alleles);
6138 GSC_DELETE_BUFFER(effset_rows);
6139 return GSC_NO_EFFECTSET;
6140 }
6141}
6142
6150 switch (c.cell_len) {
6151 case 1:
6152 switch (c.cell[0]) {
6153 case '0':
6154 case '1':
6155 case '2':
6157 case 'G': // G
6158 case 'A': // A
6159 case 'T': // T
6160 case 'C': // C
6161 case 'R': // G/A
6162 case 'Y': // T/C
6163 case 'M': // A/C
6164 case 'K': // G/T
6165 case 'S': // G/C
6166 case 'W': // A/T
6167 case 'N': // any
6169 default:
6170 break;
6171 }
6172 break;
6173 case 2:
6174 if (c.cell[0] == 'm') { // m[numeric] case, which is probably a marker not an allele pair
6175 switch (c.cell[1]) {
6176 case '0':
6177 case '1':
6178 case '2':
6179 case '3':
6180 case '4':
6181 case '5':
6182 case '6':
6183 case '7':
6184 case '8':
6185 case '9':
6187 default:
6188 break;
6189 }
6190 }
6192 case 3:
6193 if (c.cell[1] == '/') {
6195 }
6196 break;
6197 default:
6198 break;
6199 }
6201}
6202
6209 GSC_GENOLEN_T markerix,
6210 enum gsc_GenotypeFileCellStyle style,
6211 char* cell,
6212 gsc_SimData* forrng) {
6213 char* pos = loc.localAM->alleles[loc.localPos] + 2*markerix;
6214 int phase = 0;
6215 switch (style) {
6217 pos[0] = cell[0];
6218 pos[1] = cell[1];
6219 break;
6221 pos[0] = cell[0];
6222 pos[1] = cell[2];
6223 break;
6225 switch (cell[0]) {
6226 case '0':
6227 pos[0] = 'T';
6228 pos[1] = 'T';
6229 break;
6230 case '1':
6231 phase = rnd_pcg_range(&forrng->rng,0,1);
6232 pos[phase] = 'A';
6233 pos[1-phase] = 'T';
6234 break;
6235 case '2':
6236 pos[0] = 'A';
6237 pos[1] = 'A';
6238 break;
6239 }
6240 break;
6242 switch (cell[0]) {
6243 case 'G': // G
6244 pos[0] = 'G';
6245 pos[1] = 'G';
6246 break;
6247 case 'A': // A
6248 pos[0] = 'A';
6249 pos[1] = 'A';
6250 break;
6251 case 'T': // T
6252 pos[0] = 'T';
6253 pos[1] = 'T';
6254 break;
6255 case 'C': // C
6256 pos[0] = 'C';
6257 pos[1] = 'C';
6258 break;
6259 case 'R': // G/A
6260 phase = rnd_pcg_range(&forrng->rng,0,1);
6261 pos[phase] = 'G';
6262 pos[1-phase] = 'A';
6263 break;
6264 case 'Y': // T/C
6265 phase = rnd_pcg_range(&forrng->rng,0,1);
6266 pos[phase] = 'T';
6267 pos[1-phase] = 'C';
6268 break;
6269 case 'M': // A/C
6270 phase = rnd_pcg_range(&forrng->rng,0,1);
6271 pos[phase] = 'A';
6272 pos[1-phase] = 'C';
6273 break;
6274 case 'K': // G/T
6275 phase = rnd_pcg_range(&forrng->rng,0,1);
6276 pos[phase] = 'G';
6277 pos[1-phase] = 'T';
6278 break;
6279 case 'S': // G/C
6280 phase = rnd_pcg_range(&forrng->rng,0,1);
6281 pos[phase] = 'G';
6282 pos[1-phase] = 'C';
6283 break;
6284 case 'W': // A/T
6285 phase = rnd_pcg_range(&forrng->rng,0,1);
6286 pos[phase] = 'A';
6287 pos[1-phase] = 'T';
6288 break;
6289 default:
6290 break;
6291 }
6292 break;
6293 default: break;
6294 }
6295}
6296
6301 gsc_GroupNum allocation_group) {
6302 struct gsc_EmptyListNavigator me = { .d=d,
6303 .localPos = 0,
6304 .alloctogroup = allocation_group,
6305 .currentid = d->current_id };
6307 me.d->n_labels,
6308 me.d->label_defaults, 0);
6309 me.localAM = me.firstAM;
6310 return me;
6311}
6312
6318 it->localAM = it->firstAM;
6319 it->localPos = 0;
6320 if (1 > it->localAM->n_genotypes) {
6321 it->localAM->n_genotypes = 1;
6322 it->localAM->alleles[0] = gsc_malloc_wrap(sizeof(char) * (it->localAM->n_markers<<1),GSC_TRUE);
6323 memset(it->localAM->alleles[0], 0, sizeof(char) * (it->localAM->n_markers<<1));
6324 it->localAM->names[0] = NULL;
6325 it->localAM->groups[0] = it->alloctogroup;
6326 ++(it->currentid.id);
6327 it->localAM->ids[0] = it->currentid;
6328 }
6329 return (gsc_GenoLocation){.localAM=it->localAM, .localPos =it->localPos};
6330}
6331
6337 if (CONTIG_WIDTH - 1 == it->localPos) {
6338 if (NULL == it->localAM->next) {
6340 it->d->n_labels,
6341 it->d->label_defaults, 0);
6342 it->localAM->next = next;
6343 it->localAM = next;
6344 it->localPos = 0;
6345 } else {
6346 it->localAM = it->localAM->next;
6347 it->localPos = 0;
6348 }
6349 } else {
6350 ++(it->localPos);
6351 }
6352
6353 if (it->localAM->n_genotypes <= it->localPos) {
6354 if (1 < it->localPos - it->localAM->n_genotypes) {
6355 fprintf(stderr,"EmptyListNavigator invalid\n");
6356 return INVALID_GENO_LOCATION;
6357 }
6358 ++(it->localAM->n_genotypes);
6359
6360 it->localAM->alleles[it->localPos] = gsc_malloc_wrap(sizeof(char) * (it->localAM->n_markers<<1),GSC_TRUE);
6361 memset(it->localAM->alleles[it->localPos], 0, sizeof(char) * (it->localAM->n_markers<<1));
6362 it->localAM->names[it->localPos] = NULL;
6363 it->localAM->groups[it->localPos] = it->alloctogroup;
6364 ++(it->currentid.id);
6365 it->localAM->ids[it->localPos] = it->currentid;
6366 }
6367
6368 return (gsc_GenoLocation){.localAM=it->localAM, .localPos =it->localPos};
6369}
6370
6376 if (NULL == it->d->m) {
6377 it->d->m = it->firstAM;
6378 } else {
6379 gsc_AlleleMatrix* listend = it->d->m;
6380 while (NULL != listend->next) {
6381 listend = listend->next;
6382 }
6383 listend->next = it->firstAM;
6385 }
6386 it->d->current_id = it->currentid;
6387}
6388
6416 const SimData* d,
6417 const gsc_TableFileCell* cellqueue,
6418 const size_t firstrowlen,
6419 const size_t queuelen,
6420 struct gsc_GenotypeFile_MatrixFormat format,
6421 const char* filenameforlog) {
6422
6423 if (format.markers_as_rows == GSC_TRUE || format.markers_as_rows == GSC_FALSE) {
6424 // pass
6425 } else if (d->genome.n_maps == 0) {
6426 // If there is no genetic map, we cannot check the row/column headers to see if any of them match the marker names..
6427 // Default to markers being rows
6428
6429 printf("(Loading %s) Format axis: genetic markers are -rows-, founder lines are |columns| (by assumption when no genetic map is loaded)\n", filenameforlog);
6430 printf("(Loading %s) No genetic map is loaded, will invent a map with equal spacing of these genetic markers (1cM apart)\n", filenameforlog);
6431 format.markers_as_rows = GSC_TRUE;
6432
6433 } else if (format.has_header == GSC_FALSE) {
6434 printf("(Loading %s) Format axis: genetic markers are -rows-, founder lines are |columns| "
6435 "(by assumption when matrix has no header row)\n", filenameforlog);
6436 format.markers_as_rows = GSC_TRUE;
6437
6438 } else {
6439 // Note: by here, either the user has told us there is a header row, or we get to detect whether there is one. So will investigate it by comparing names to what's in our map
6440 // taken from older function gsc_helper_genotypefile_matrix_check_markers_are_rows
6441 int firstsafeheaderindex = -1;
6442 if (firstrowlen > 1) {
6443 firstsafeheaderindex = 1;
6444 } else if (firstrowlen == 1 && queuelen > firstrowlen + 1) { // second row has more than one cell read.
6445 firstsafeheaderindex = 0; // assume there's no corner cell
6446 format.has_header = GSC_TRUE;
6447 }
6448
6449 if (firstsafeheaderindex >= 0) {
6450 // Don't check the "first" cell. It might be a corner cell between the two headers, whose value should be ignored
6451 // Check the next cell in the first row.
6452 if (gsc_get_index_of_genetic_marker(cellqueue[firstsafeheaderindex].cell, d->genome, NULL)) {
6453 printf("(Loading %s) Format axis: genetic markers are |columns|, founder lines are -rows-\n", filenameforlog);
6454 format.markers_as_rows = GSC_FALSE;
6455 format.has_header = GSC_TRUE;
6456 return format;
6457 }
6458
6459 // If that wasn't a match, check the first row header, if it exists:
6460 if (queuelen > firstrowlen && !cellqueue[firstrowlen].eof &&
6461 gsc_get_index_of_genetic_marker(cellqueue[firstrowlen].cell, d->genome, NULL)) {
6462 printf("(Loading %s) Format axis: genetic markers are -rows-, founder lines are |columns|\n", filenameforlog);
6463 format.markers_as_rows = GSC_TRUE;
6464 return format;
6465 }
6466
6467 // Check remaining column headers
6468 for (size_t i = firstsafeheaderindex + 1; i < firstrowlen; ++i) {
6469 if (gsc_get_index_of_genetic_marker(cellqueue[i].cell, d->genome, NULL)) {
6470 printf("(Loading %s) Format axis: genetic markers are |columns|, founder lines are -rows-\n", filenameforlog);
6471 format.markers_as_rows = GSC_FALSE;
6472 format.has_header = GSC_TRUE;
6473 return format;
6474 }
6475 }
6476
6477 }
6478 printf("(Loading %s) Format axis: genetic markers are -rows-, founder lines are |columns| (by default file format)\n", filenameforlog);
6479 format.markers_as_rows = GSC_TRUE;
6480 }
6481 return format;
6482
6483}
6484
6497 const gsc_TableFileCell* cellqueue,
6498 const size_t firstrowlen,
6499 const size_t queuelen,
6500 struct gsc_GenotypeFile_MatrixFormat format,
6501 const char* filenameforlog) {
6502
6503 _Bool style_detected = 0;
6504 _Bool single_col_file = 0;
6505 // 1. Detect format if not yet provided.
6506 if (format.cell_style == GSC_GENOTYPECELLSTYLE_UNKNOWN) {
6507 style_detected = 1;
6508
6509 if (firstrowlen == queuelen || cellqueue[firstrowlen].eof) { // There is only one row. Short-circuiting necessary
6510 // if there is also only one column, we have no body cells to detect the style of
6511 if (firstrowlen > 1) {
6512 // Detection path for a single-line file. If it has a header, then this value might end up ignored
6514 } else {
6515 single_col_file = 1; // one-cell file. needs the warning.
6516 }
6517 } else { // there is more than one row
6518 // If there is only one column, there are no body cells with style to detect
6519 if (firstrowlen + 1 < queuelen && cellqueue[firstrowlen+1].predNewline < 1) {
6520 // Detection path. There exists a second cell on the second line that we can read
6521 format.cell_style = gsc_helper_genotype_matrix_identify_cell_style(cellqueue[firstrowlen+1]);
6522 } else {
6523 single_col_file = 1;
6524 }
6525 }
6526 }
6527
6528 // 2. Print cell style detection logs
6529 if (style_detected) {
6530 switch(format.cell_style) {
6531 case GSC_GENOTYPECELLSTYLE_PAIR: printf("(Loading %s) Allele format: phased allele pairs\n", filenameforlog); break;
6532 case GSC_GENOTYPECELLSTYLE_SLASHPAIR: printf("(Loading %s) Allele format: phased allele pairs (slash-separated)\n", filenameforlog); break;
6533 case GSC_GENOTYPECELLSTYLE_COUNT: printf("(Loading %s) Allele format: reference allele counts (phase will be randomised)\n", filenameforlog); break;
6534 case GSC_GENOTYPECELLSTYLE_ENCODED: printf("(Loading %s) Allele format: IUPAC encoded pair (phase will be randomised)\n", filenameforlog); break;
6536 if (single_col_file || firstrowlen == queuelen ||
6537 (firstrowlen + 1 == queuelen && cellqueue[firstrowlen].eof && cellqueue[firstrowlen].cell_len == 0)) {
6538 printf("(Loading %s) Warning: empty genotype matrix. No genotypes will be loaded.\n", filenameforlog);
6539 } else {
6540 fprintf(stderr,"(Loading %s) Failure: Unable to determine the formatting of pairs of alleles."
6541 " Check genomicSimulation manual for accepted allele pair encodings\n", filenameforlog);
6542 }
6543 }
6544 }
6545
6546 return format;
6547}
6548
6577 const gsc_TableFileCell* cellqueue,
6578 const size_t firstrowlen,
6579 const size_t queuelen,
6580 struct gsc_GenotypeFile_MatrixFormat format,
6581 const char* filenameforlog) {
6582 // Validity check: if genetic markers are columns, header row is mandatory
6583 if (format.has_header == GSC_FALSE && format.markers_as_rows == GSC_FALSE) {
6584 printf("(Loading %s) Failure: genetic markers cannot be represented by columns when matrix has no header row\n", filenameforlog);
6585 format.has_header = GSC_NA;
6586 return format;
6587 }
6588
6589 // Detect header if we need to detect it.
6590 if (format.has_header != GSC_FALSE && format.has_header != GSC_TRUE) {
6591 if (firstrowlen == 1) {
6592 // we could have a single-column file (no header assumed), or
6593 // we could be a two-column file with no corner cell (must have a header)
6594 if (queuelen > 2) {
6595 if (cellqueue[2].eof || cellqueue[2].predNewline) {
6596 format.has_header = GSC_FALSE; // single column file
6597 } else {
6598 format.has_header = GSC_TRUE;
6599 }
6600 } // else can't draw any conclusions.
6601
6602 } else if (format.cell_style != GSC_GENOTYPECELLSTYLE_UNKNOWN) {
6603 // Idea: if we find a cell in the first row that doesn't match the expected cell style, then that first row is probably a header
6604 format.has_header = GSC_FALSE;
6605 for (size_t i = 1; i < firstrowlen; ++i) { // ignore first cell in row, it could be a corner cell or row header
6606 if (gsc_helper_genotype_matrix_identify_cell_style(cellqueue[i]) != format.cell_style) {
6607 format.has_header = GSC_TRUE;
6608 break;
6609 }
6610 }
6611 } // else don't know how to detect.
6612
6613 switch (format.has_header) {
6614 case GSC_FALSE: printf("(Loading %s) Format: genotype matrix without header row\n", filenameforlog); break;
6615 case GSC_TRUE: printf("(Loading %s) Format: genotype matrix with header row\n", filenameforlog); break;
6616 default: fprintf(stderr,"(Loading %s) Failure: Unable to determine whether file has header row\n", filenameforlog); break;
6617 }
6618 }
6619
6620 return format;
6621}
6622
6639 const size_t ncellsfirstrow,
6640 const size_t ncellssecondrow,
6641 const _Bool secondrowheaderisempty) {
6642 if (ncellssecondrow == ncellsfirstrow + 1) {
6643 return GSC_FALSE;
6644 } else if (ncellssecondrow == ncellsfirstrow) {
6645 if (secondrowheaderisempty) {
6646 return GSC_FALSE; //genotype name is simply empty, making the second row look one column shorter than reality
6647 } else {
6648 return GSC_TRUE;
6649 }
6650 } else if (ncellssecondrow == ncellsfirstrow - 1 && secondrowheaderisempty) {
6651 return GSC_TRUE; // genotype name on row 2 is empty but corner cell is not
6652 } else {
6653 return GSC_NA;
6654 }
6655}
6656
6686 .spec={(struct gsc_GenotypeFile_MatrixFormat){.cell_style=cell_style,
6687 .has_header=has_header,
6688 .markers_as_rows=markers_as_rows}}};
6689}
6690
6707 const char* filename,
6708 const gsc_FileFormatSpec format) {
6709 if (filename == NULL) return NO_GROUP;
6711 fprintf(stderr,"Non-genotype-matrix format specification provided to genotype matrix file loader function\n");
6712 return NO_GROUP;
6713 }
6714
6715 // Part 1: Detect file formatting details
6716 struct gsc_GenotypeFile_MatrixFormat format_detected =
6717 { .has_header = GSC_NA, .markers_as_rows = GSC_NA, .cell_style = GSC_GENOTYPECELLSTYLE_UNKNOWN };
6718 if (format.filetype == GSC_GENOTYPEFILE_MATRIX) {
6719 format_detected = format.spec.matrix;
6720 }
6721 size_t queuesize = 0;
6722
6724 // Read one row + 2 cells (if possible)
6725 GSC_CREATE_BUFFER(cellsread,gsc_TableFileCell,100);
6726 size_t ncellsread = 0;
6727 do {
6728 cellsread[ncellsread] = gsc_tablefilereader_get_next_cell(&tbl);
6729 gsc_tablefilecell_deep_copy(&cellsread[ncellsread]);
6730 ++ncellsread;
6731 if (ncellsread >= cellsreadcap) {
6732 GSC_STRETCH_BUFFER(cellsread,2*ncellsread);
6733 }
6734 } while (!cellsread[ncellsread-1].eof && (ncellsread <= 1 || !cellsread[ncellsread-1].predNewline));
6735 size_t ncellsfirstrow = (cellsread[ncellsread-1].eof && cellsread[ncellsread-1].cell_len > 0) ? ncellsread : ncellsread - 1;
6736 if (!cellsread[ncellsread-1].eof) { // read one more cell if possible
6737 cellsread[ncellsread] = gsc_tablefilereader_get_next_cell(&tbl);
6738 gsc_tablefilecell_deep_copy(&cellsread[ncellsread]);
6739 ++ncellsread;
6740 if (ncellsread >= cellsreadcap) {
6741 GSC_STRETCH_BUFFER(cellsread,2*ncellsread);
6742 }
6743 }
6744 queuesize = ncellsread; // so that we know how many to free if we failure_exit
6745 if (ncellsread <= 1) { // file is an EOF only
6746 goto failure_exit;
6747 }
6748 //int is_onecol_file = cellsread[ncellsfirstrow + 1].predNewline > 0 || ncellsread == 2; // ncellsread == 2 means we read one cell, then an EOF
6749 int is_onerow_file = ncellsread == ncellsfirstrow || cellsread[ncellsfirstrow].eof; // short-circuiting essential!
6750
6751 format_detected = gsc_helper_genotypefile_matrix_detect_orientation(d, cellsread, ncellsfirstrow, ncellsread, format_detected, filename);
6752 format_detected = gsc_helper_genotypefile_matrix_detect_cellstyle(cellsread, ncellsfirstrow, ncellsread, format_detected, filename);
6753 format_detected = gsc_helper_genotypefile_matrix_detect_header(cellsread, ncellsfirstrow, ncellsread, format_detected, filename);
6754 if ((format_detected.has_header != GSC_FALSE && format_detected.has_header != GSC_TRUE) ||
6755 (format_detected.markers_as_rows != GSC_FALSE && format_detected.markers_as_rows != GSC_TRUE) ||
6756 format_detected.cell_style == GSC_GENOTYPECELLSTYLE_UNKNOWN) {
6757 goto failure_exit;
6758 }
6759
6760 GSC_LOGICVAL format_has_corner_cell = GSC_NA;
6761 // If markers as columns, we do need to know how many cells are in the second row in order to detect a corner cell
6762 if (!format_detected.markers_as_rows && !is_onerow_file) {
6763 // Read rest of second row
6764 while (!cellsread[ncellsread-1].eof && !cellsread[ncellsread-1].predNewline) {
6765 cellsread[ncellsread] = gsc_tablefilereader_get_next_cell(&tbl);
6766 gsc_tablefilecell_deep_copy(&cellsread[ncellsread]);
6767 ++ncellsread;
6768 if (ncellsread >= cellsreadcap) {
6769 GSC_STRETCH_BUFFER(cellsread,2*ncellsread);
6770 }
6771 }
6772 // Detect corner cell
6773 queuesize = ncellsread; // so that we know how many to free if we failure_exit
6774 size_t ncellssecondrow = ncellsread - ncellsfirstrow - 1;
6775 format_has_corner_cell = gsc_helper_genotypefile_matrix_detect_cornercell_presence(ncellsfirstrow, ncellssecondrow, cellsread[ncellsfirstrow].predCol > 0);
6776 if (format_has_corner_cell == GSC_NA) {
6777 fprintf(stderr, "(Loading %s) Failure: Header row length and second row length do not align\n", filename);
6778 goto failure_exit;
6779 }
6780 }
6781
6782 // Create the queue of cells to parse (exclude header from this queue, because it needs to be dealt with differently)
6783 gsc_TableFileCell* cellqueue = cellsread;
6784 //queuesize = ncellsread; (already done above)
6785 if (format_detected.has_header) {
6786 cellqueue = cellsread + ncellsfirstrow;
6787 queuesize = ncellsread - ncellsfirstrow;
6788 }
6789
6790 // PART 2: Create uniform-spaced map, if we have no map currently
6791 _Bool build_map_from_rows = 0;
6792 if (d->genome.n_markers == 0) {
6793 if (format_detected.markers_as_rows) {
6794 build_map_from_rows = 1;
6795 // We're going to have to do an independent read of the file to extract these. Will be a bit slower.
6798 GSC_GENOLEN_T nmarkersread = format_detected.has_header ? 0 : 1;
6799 do {
6801 if (cell.predNewline) { ++nmarkersread; }
6802 } while (!cell.eof);
6804 if (cell.predNewline) { // there's a newline before eof, so no real actual last row
6805 --nmarkersread;
6806 }
6807
6808 d->genome.n_markers = nmarkersread;
6812
6813 } else { // markers as columns
6814 if (!format_detected.has_header) { // you should not be able to get here. // assert(format_detected.has_header == GSC_TRUE);
6815 fprintf(stderr, "(Loading %s) Failure: Genotype matrix with markers as columns but no header row is an unsupported file type (there is no way to tell which column is which marker)\n", filename);
6816 goto failure_exit;
6817 }
6818
6819 size_t i = format_has_corner_cell ? 1 : 0; // starting index for iterating through names
6820 d->genome.n_markers = ncellsfirstrow - i;
6823 for (size_t j = 0; j < d->genome.n_markers; ++i, ++j) {
6824 //gsc_tablefilecell_deep_copy(&cellqueue[i]); // already deep copied
6825 d->genome.marker_names[j] = cellsread[i].cell;
6826 cellsread[i].isCellShallow = GSC_TRUE; // prevent deletion
6828 }
6830 gsc_create_uniformspaced_recombmap(d,0,NULL,d->genome.n_markers); // create based on the markers we've saved in 'genome'
6831 }
6832 }
6833
6834 // PART 3: Parse file into an AlleleMatrix
6835
6838 GSC_GENOLEN_T nvalidmarker = 0;
6839 size_t n_cols = 0;
6840 if (format_detected.markers_as_rows) {
6841
6842 gsc_GenoLocation loc;
6843 gsc_TableFileCell ncell;
6844 n_cols = (format_detected.has_header) ? ncellsfirstrow + 1 : ncellsfirstrow; // assume first row has no corner cell for now
6845 _Bool first = 1;
6846 _Bool have_valid_marker = 0; GSC_GENOLEN_T markerix;
6847 GSC_GLOBALX_T column = 0;
6848 size_t row = 0;
6849 do {
6850 ncell = gsc_helper_tablefilereader_get_next_cell_wqueue(&tbl,&cellqueue,&queuesize);
6851
6852 if (ncell.cell != NULL) {
6853 if (ncell.predNewline || first) {
6854 char tmp = ncell.cell[ncell.cell_len]; ncell.cell[ncell.cell_len] = '\0';
6855
6856 if (build_map_from_rows) {
6857 have_valid_marker = 1;
6858 if (first) {
6859 markerix = 0;
6860 } else {
6861 markerix++;
6862 }
6864 d->genome.marker_names[markerix] = ncell.cell;
6865 ncell.isCellShallow = GSC_TRUE; // prevent deletion
6866 } else {
6867 have_valid_marker = gsc_get_index_of_genetic_marker(ncell.cell, d->genome, &markerix);
6868 }
6869
6870 nvalidmarker += have_valid_marker;
6871 ncell.cell[ncell.cell_len] = tmp;
6872 // Then, after reading first row, detect what our expected row length is, if defaults don't suit.
6873 if (row == 1 && format_detected.has_header) {
6874 if (column + 1 != ncellsfirstrow && column + 1 != ncellsfirstrow + 1) {
6875 fprintf(stderr, "(Loading %s) Failure: Header row length and second row length do not align\n", filename);
6876 goto failure_exit;
6877 } else {
6878 n_cols = column + 1;
6879 }
6880 }
6881 first = 0;
6882 column = 0;
6883 ++row;
6884
6885 } else if (ncell.predCol) { // any number of column spacers treated as one column gap when reading a genotype matrix
6886 ++column;
6887 if (have_valid_marker && column < n_cols) {
6888 loc = (1 == column) ? gsc_emptylistnavigator_get_first(&it) : gsc_emptylistnavigator_get_next(&it);
6889 gsc_helper_genotypecell_to_allelematrix(loc,markerix,format_detected.cell_style,ncell.cell,d);
6890 } // Note we ignore all extra cells in all rows
6891 }
6892 }
6893
6894 if (!ncell.isCellShallow) { GSC_FREE(ncell.cell); }
6895 } while (!ncell.eof);
6896 if (row == 1 && format_detected.has_header) {
6897 if (column + 1 != ncellsfirstrow && column + 1 != ncellsfirstrow + 1) {
6898 fprintf(stderr, "(Loading %s) Failure: Header row length and second row length do not align\n", filename);
6899 goto failure_exit;
6900 } else {
6901 n_cols = column + 1;
6902 }
6903 }
6904
6905 // Then save the genotype names
6906 if (format_detected.has_header) {
6907 format_has_corner_cell = gsc_helper_genotypefile_matrix_detect_cornercell_presence(ncellsfirstrow, n_cols, cellsread[ncellsfirstrow].predCol > 0);
6908 size_t i = format_has_corner_cell ? 1 : 0;
6909 gsc_GenoLocation loc;
6910 for (size_t j = 0; i < ncellsfirstrow; ++i, ++j) {
6912 // assert(!cellsread[i].isShallowCopy);
6913 gsc_set_name(loc,cellsread[i].cell); // using names here so no need to free them. Since they're in cellsread
6914 cellsread[i].isCellShallow = GSC_TRUE; // prevent deletion
6915 }
6916 }
6917
6918 // Then finalise the map, if we're creating one:
6919 if (build_map_from_rows) {
6920 for (GSC_GENOLEN_T i = 0; i < d->genome.n_markers; ++i) {
6922 }
6924 }
6925
6926 } else { // markers as columns
6927 if (!format_detected.has_header) { // you should not be able to get here.
6928 fprintf(stderr, "(Loading %s) Failure: Genotype matrix with markers as columns but no header row is an unsupported file type (there is no way to tell which column is which marker)\n", filename);
6929 goto failure_exit;
6930 }
6931
6932 // Identify the marker corresponding to each column
6933 size_t i = format_has_corner_cell ? 1 : 0;
6934 size_t n_col = ncellsfirstrow + (1-i);
6935 GSC_GENOLEN_T* markerixs = gsc_malloc_wrap(sizeof(*markerixs)*ncellsfirstrow,GSC_TRUE);
6936 for (GSC_GENOLEN_T j = 0; i < ncellsfirstrow; ++i, ++j) {
6937 markerixs[j] = d->genome.n_markers;
6938 nvalidmarker += gsc_get_index_of_genetic_marker(cellsread[i].cell, d->genome, &markerixs[j]);
6939 }
6940
6941 // Read the table
6942 _Bool first = 1;
6943 GSC_GLOBALX_T row = 0;
6944 size_t column = 0; // we count column numbers from 1 for the first body cell. sorry for the inconsistency with the branch of the if statement above.
6946 gsc_TableFileCell ncell;
6947 do {
6948 ncell = gsc_helper_tablefilereader_get_next_cell_wqueue(&tbl,&cellqueue,&queuesize);
6949
6950 if (ncell.cell != NULL) {
6951 if (ncell.predNewline) {
6953 first = 0;
6954
6955 ++row;
6956 column = 0;
6957 if (ncell.predCol) { // missing name.
6958 gsc_set_name(loc,NULL);
6959 } else {
6961 gsc_set_name(loc,ncell.cell);
6962 ncell.isCellShallow = GSC_TRUE; // so it does not get deleted
6963 }
6964 }
6965
6966 if (ncell.predCol) {
6967 ++column;
6968 if (column < n_col && markerixs[column-1] < d->genome.n_markers) {
6969 gsc_helper_genotypecell_to_allelematrix(loc,markerixs[column-1],format_detected.cell_style,ncell.cell,d);
6970 }
6971 }
6972 }
6973
6974 if (!ncell.isCellShallow) { GSC_FREE(ncell.cell); }
6975 } while (!ncell.eof);
6976
6977 GSC_FREE(markerixs);
6978
6979 }
6980
6981 // PART 4: Tidy and clean and exit
6982 GSC_GLOBALX_T ngenos = 0;
6983 AlleleMatrix* tmpam = it.firstAM;
6984 do {
6985 ngenos += tmpam->n_genotypes;
6986 } while ((tmpam = tmpam->next) != NULL);
6987 printf("(Loading %s) %lu genotype(s) of %lu marker(s) were loaded.\n", filename,
6988 (long unsigned int) ngenos, (long unsigned int) nvalidmarker);
6989 if (ngenos == 0) {
6991 goto failure_exit;
6992 }
6994 ++d->n_groups;
6995
6996 // ... cleaning up the header row
6997 if (format_detected.has_header) {
6998 for (size_t j = 0; j < ncellsfirstrow; ++j) {
6999 if (!cellsread[j].isCellShallow) { GSC_FREE(cellsread[j].cell); }
7000 }
7001 }
7002 GSC_DELETE_BUFFER(cellsread);
7004 return group;
7005
7006 failure_exit:
7007 // Clean up structures and return, having loaded no genotypes
7008 // ... cleaning up unprocessed cells in the queue
7009 for (size_t i = 1; i <= queuesize; ++i) {
7010 if (!cellsread[ncellsread-i].isCellShallow) {
7011 GSC_FREE(cellsread[ncellsread-i].cell);
7012 cellsread[ncellsread-i].isCellShallow = GSC_TRUE;
7013 }
7014 }
7015 // ... cleaning up the header row
7016 if (format_detected.has_header) {
7017 for (size_t j = 0; j < ncellsfirstrow; ++j) {
7018 if (!cellsread[j].isCellShallow) { GSC_FREE(cellsread[j].cell); }
7019 }
7020 }
7021 GSC_DELETE_BUFFER(cellsread);
7023 return NO_GROUP;
7024}
7025
7044 const char* filename,
7045 const gsc_FileFormatSpec format) {
7046 return gsc_load_data_files(d,filename,NULL,NULL,format).group;
7047}
7048
7068 const char* genotype_file,
7069 const char* map_file,
7070 const char* effect_file,
7071 const gsc_FileFormatSpec format) {
7072 // Parse file suffix for file type, if it was not already provided
7073 enum gsc_GenotypeFileType type = format.filetype;
7074
7075 if (type == GSC_GENOTYPEFILE_UNKNOWN) {
7077 char* suffix = strrchr(genotype_file,'.');
7078 if (suffix != NULL) {
7079 if (strcmp(suffix,".bed") == 0) {
7080 type = GSC_GENOTYPEFILE_BED;
7081 } else if (strcmp(suffix,".ped") == 0) {
7082 type = GSC_GENOTYPEFILE_PED;
7083 } else if (strcmp(suffix,".vcf") == 0) {
7084 type = GSC_GENOTYPEFILE_VCF;
7085 }
7086 }
7087 }
7088
7089 struct gsc_MultiIDSet out = { .group=NO_GROUP, .map=NO_MAP, .effSet=NO_EFFECTSET };
7090
7091 switch (type) {
7093 //if (detectedtype) { printf("Will attempt to parse %s as a plink .bed file\n", filename); }
7094 fprintf(stderr,"plink .bed file parsing not yet implemented\n");
7095 break;
7097 fprintf(stderr,"plink .ped file parsing not yet implemented\n");
7098 break;
7100 fprintf(stderr,"vcf file parsing not yet implemented\n");
7101 break;
7102 default:
7103 //printf("(Loading files) Will treat %s as a genotype matrix (see genomicSimulation's default input file types)\n", genotype_file);
7104 out.map = gsc_load_mapfile(d, map_file);
7105 out.group = gsc_load_genotypefile_matrix(d, genotype_file, format);
7106 out.effSet = gsc_load_effectfile(d, effect_file);
7107 }
7108
7109 return out;
7110}
7111
7112/*--------------------------Recombination counts-----------------------------*/
7113
7148int* gsc_calculate_min_recombinations_fw1(gsc_SimData* d, gsc_MapID mapid, char* parent1, unsigned int p1num, char* parent2,
7149 unsigned int p2num, char* offspring, int certain) {
7150 if (d->genome.n_maps < 1) {
7151 fprintf(stderr,"Need at least one recombination map loaded to estimate recombinations\n");
7152 return NULL;
7153 }
7154 int mapix = 0;
7155 if (mapid.id != NO_MAP.id) { mapix = gsc_get_index_of_map(d, mapid); }
7156 if (mapix >= d->genome.n_maps) {
7157 fprintf(stderr,"We don't have that recombination maps loaded\n");
7158 return NULL;
7159 }
7160 gsc_RecombinationMap map = d->genome.maps[mapix];
7161
7162 int* origins = gsc_malloc_wrap(sizeof(int) * d->genome.n_markers,GSC_TRUE);
7163 memset(origins,0,sizeof(*origins)*d->genome.n_markers);
7164 int p1match, p2match;
7165 int previous = 0;
7166
7167
7168 for (int chr = 0; chr < map.n_chr; ++chr) {
7169 //RPACKINSERT R_CheckUserInterrupt();
7170
7171 switch (map.chrs[chr].type) {
7172 case GSC_LINKAGEGROUP_SIMPLE:
7173 for (int i = 0; i < map.chrs[chr].map.simple.n_markers; ++i) {
7174 p1match = gsc_has_same_alleles(parent1, offspring, i);
7175 p2match = gsc_has_same_alleles(parent2, offspring, i);
7176 if (p1match && !p2match) {
7177 origins[map.chrs[chr].map.simple.first_marker_index + i] = p1num;
7178 previous = p1num;
7179 } else if (p2match && !p1match) {
7180 origins[map.chrs[chr].map.simple.first_marker_index + i] = p2num;
7181 previous = p2num;
7182 } else {
7183 if (certain) {
7184 origins[map.chrs[chr].map.simple.first_marker_index + i] = 0;
7185 } else {
7186 origins[map.chrs[chr].map.simple.first_marker_index + i] = previous;
7187 }
7188 }
7189 }
7190 break;
7191
7192 case GSC_LINKAGEGROUP_REORDER:
7193 for (int i = 0; i < map.chrs[chr].map.reorder.n_markers; ++i) {
7194 p1match = gsc_has_same_alleles(parent1, offspring, i);
7195 p2match = gsc_has_same_alleles(parent2, offspring, i);
7196 if (p1match && !p2match) {
7197 origins[map.chrs[chr].map.reorder.marker_indexes[i]] = p1num;
7198 previous = p1num;
7199 } else if (p2match && !p1match) {
7200 origins[map.chrs[chr].map.reorder.marker_indexes[i]] = p2num;
7201 previous = p2num;
7202 } else {
7203 if (certain) {
7204 origins[map.chrs[chr].map.reorder.marker_indexes[i]] = 0;
7205 } else {
7206 origins[map.chrs[chr].map.reorder.marker_indexes[i]] = previous;
7207 }
7208 }
7209 }
7210 break;
7211 }
7212
7213 }
7214 return origins;
7215}
7216
7257int* gsc_calculate_min_recombinations_fwn(gsc_SimData* d, gsc_MapID mapid, char* parent1, unsigned int p1num, char* parent2,
7258 unsigned int p2num, char* offspring, int window_size, int certain) {
7259 if (d->genome.n_maps < 1) {
7260 fprintf(stderr,"Need at least one recombination map loaded to estimate recombinations\n");
7261 return NULL;
7262 }
7263 int mapix = 0;
7264 if (mapid.id != NO_MAP.id) { mapix = gsc_get_index_of_map(d, mapid); }
7265 if (mapix >= d->genome.n_maps) {
7266 fprintf(stderr,"We don't have that recombination maps loaded\n");
7267 return NULL;
7268 }
7269 gsc_RecombinationMap map = d->genome.maps[mapix];
7270
7271
7272 int* origins = gsc_malloc_wrap(sizeof(int) * d->genome.n_markers,GSC_TRUE);
7273 memset(origins,0,sizeof(*origins)*d->genome.n_markers);
7274 int p1match, p2match;
7275 int previous = 0, window_range = (window_size - 1)/2, i;
7276
7277 for (int chr = 0; chr < map.n_chr; ++chr) {
7278 //RPACKINSERT R_CheckUserInterrupt();
7279
7280 switch (map.chrs[chr].type) {
7281 case GSC_LINKAGEGROUP_SIMPLE:
7282 for (i = 0; i < window_range; ++i) {
7283 origins[map.chrs[chr].map.simple.first_marker_index + i] = 0;
7284 }
7285 for (; i < map.chrs[chr].map.simple.n_markers - window_range; ++i) {
7286 p1match = gsc_has_same_alleles_window(parent1, offspring, i, window_size);
7287 p2match = gsc_has_same_alleles_window(parent2, offspring, i, window_size);
7288 if (p1match && !p2match) {
7289 origins[map.chrs[chr].map.simple.first_marker_index + i] = p1num;
7290 previous = p1num;
7291 } else if (p2match && !p1match) {
7292 origins[map.chrs[chr].map.simple.first_marker_index + i] = p2num;
7293 previous = p2num;
7294 } else {
7295 if (certain) {
7296 origins[map.chrs[chr].map.simple.first_marker_index + i] = 0;
7297 } else {
7298 origins[map.chrs[chr].map.simple.first_marker_index + i] = previous;
7299 }
7300 }
7301 }
7302 for (; i < map.chrs[chr].map.simple.n_markers; ++i) {
7303 origins[map.chrs[chr].map.simple.first_marker_index + i] = 0;
7304 }
7305 break;
7306
7307 case GSC_LINKAGEGROUP_REORDER:
7308 for (i = 0; i < window_range; ++i) {
7309 origins[map.chrs[chr].map.reorder.marker_indexes[i]] = 0;
7310 }
7311 for (; i < map.chrs[chr].map.reorder.n_markers - window_range; ++i) {
7312 p1match = gsc_has_same_alleles_window(parent1, offspring, i, window_size);
7313 p2match = gsc_has_same_alleles_window(parent2, offspring, i, window_size);
7314 if (p1match && !p2match) {
7315 origins[map.chrs[chr].map.reorder.marker_indexes[i]] = p1num;
7316 previous = p1num;
7317 } else if (p2match && !p1match) {
7318 origins[map.chrs[chr].map.reorder.marker_indexes[i]] = p2num;
7319 previous = p2num;
7320 } else {
7321 if (certain) {
7322 origins[map.chrs[chr].map.reorder.marker_indexes[i]] = 0;
7323 } else {
7324 origins[map.chrs[chr].map.reorder.marker_indexes[i]] = previous;
7325 }
7326 }
7327 }
7328 for (; i < map.chrs[chr].map.reorder.n_markers; ++i) {
7329 origins[map.chrs[chr].map.reorder.marker_indexes[i]] = 0;
7330 }
7331 break;
7332 }
7333
7334 }
7335 return origins;
7336}
7337
7374int gsc_calculate_recombinations_from_file(gsc_SimData* d, const char* input_file, const char* output_file,
7375 int window_len, int certain) {
7376 struct gsc_TableSize t = gsc_get_file_dimensions(input_file, '\t');
7377 //open file
7378 FILE* fp;
7379 if ((fp = fopen(input_file, "r")) == NULL) {
7380 fprintf(stderr, "Failed to open file %s.\n", input_file); exit(1);
7381 }
7382 FILE* fpo;
7383 if ((fpo = fopen(output_file, "w")) == NULL) {
7384 fprintf(stderr, "Failed to open file %s.\n", output_file); exit(1);
7385 }
7386
7387 // print header.
7388 for (int j = 0; j < d->genome.n_markers; ++j) {
7389 fprintf(fpo, "\t%s", d->genome.marker_names[j]);
7390 }
7391
7392 int combin_i[3];
7393 char* combin_genes[3];
7394 char buffer[3][50];
7395 int* r;
7396 // for each row in file
7397 for (int i = 0; i < t.num_rows; ++i) {
7398 // load the four grandparents
7399 fscanf(fp, "%s %s %s \n", buffer[0], buffer[1], buffer[2]);
7400 combin_i[0] = gsc_get_index_of_name(d->m, buffer[0]);
7401 combin_i[1] = gsc_get_index_of_name(d->m, buffer[1]);
7402 combin_i[2] = gsc_get_index_of_name(d->m, buffer[2]);
7403 if (combin_i[0] < 0 || combin_i[1] < 0 || combin_i[2] < 0) {
7404 fprintf(stderr, "Genotypes at file %s line %lu could not be found\n", input_file, (long unsigned int) i);
7405 continue;
7406 }
7407 combin_genes[0] = gsc_get_genes_of_index(d->m, combin_i[0]);
7408 combin_genes[1] = gsc_get_genes_of_index(d->m, combin_i[1]);
7409 combin_genes[2] = gsc_get_genes_of_index(d->m, combin_i[2]);
7410
7411 if (window_len == 1) {
7412 r = gsc_calculate_min_recombinations_fw1(d, NO_MAP, combin_genes[1],
7413 gsc_get_id_of_index(d->m, combin_i[1]).id, combin_genes[2],
7414 gsc_get_id_of_index(d->m, combin_i[2]).id, combin_genes[0], certain);
7415 } else {
7416 r = gsc_calculate_min_recombinations_fwn(d, NO_MAP, combin_genes[1],
7417 gsc_get_id_of_index(d->m, combin_i[1]).id, combin_genes[2],
7418 gsc_get_id_of_index(d->m, combin_i[2]).id, combin_genes[0], window_len, certain);
7419 }
7420 fprintf(fpo, "\n%s", buffer[0]);
7421 for (int j = 0; j < d->genome.n_markers; ++j) {
7422 fprintf(fpo, "\t%d", r[j]);
7423 }
7424 GSC_FREE(r);
7425 }
7426
7427 fclose(fp);
7428 fwrite("\n", sizeof(char), 1, fpo);
7429 fflush(fpo);
7430 fclose(fpo);
7431 return 0;
7432}
7433
7434
7435/*--------------------------------Crossing-----------------------------------*/
7436
7465 const char* parent_genome,
7466 char* output,
7467 const GSC_ID_T map_index) {
7468 // assumes rand is already seeded
7469 if (parent_genome == NULL) {
7470 fprintf(stderr, "Could not generate this gamete: no parent provided\n");
7471 return;
7472 }
7473 if (map_index >= d->genome.n_maps) {
7474 fprintf(stderr, "Could not generate this gamete: invalid map provided\n");
7475 return;
7476 }
7477 gsc_RecombinationMap map = d->genome.maps[map_index];
7478
7479 // treat each chromosome separately.
7480 GSC_CREATE_BUFFER(crossover_where, double, 100);
7481 for (GSC_GENOLEN_T chr = 0; chr < d->genome.maps[map_index].n_chr; ++chr) {
7482
7483 // Task 1: How many crossovers
7484 int num_crossovers;
7485 switch (map.chrs[chr].type) {
7486 case GSC_LINKAGEGROUP_SIMPLE:
7487 num_crossovers = gsc_randpoi(&d->rng, map.chrs[chr].map.simple.expected_n_crossovers);
7488 break;
7489 case GSC_LINKAGEGROUP_REORDER:
7490 num_crossovers = gsc_randpoi(&d->rng, map.chrs[chr].map.reorder.expected_n_crossovers);
7491 break;
7492 default:
7493 fprintf(stderr, "Linkage group type of linkage group with index %lu of map with index %lu is corrupted\n",
7494 (long unsigned int) chr, (long unsigned int) map_index);
7495 num_crossovers = 0;
7496 }
7497
7498 // Task 2: Find positions of all crossovers
7499 if (num_crossovers > crossover_wherecap) {
7500 GSC_STRETCH_BUFFER(crossover_where,num_crossovers);
7501 }
7502 for (int i = 0; i < num_crossovers; ++i) {
7503 crossover_where[i] = ((double)rand() / (double)RAND_MAX);
7504 }
7505 if (num_crossovers > 1) {
7506 qsort(crossover_where, num_crossovers, sizeof(double), gsc_helper_ascending_double_comparer);
7507 }
7508
7509 // Task 3: Read off the gamete that those crossovers produce.
7510 int which = rnd_pcg_range(&d->rng,0,1); // if this is 0, we start with the left haplotype
7511 int up_to_crossover = 0;
7512 switch (map.chrs[chr].type) {
7513 case GSC_LINKAGEGROUP_SIMPLE:
7514 for (GSC_GENOLEN_T i = 0; i < map.chrs[chr].map.simple.n_markers; ++i) {
7515 // is it time to invert which parent haplotype we're reading?
7516 while (up_to_crossover < num_crossovers &&
7517 map.chrs[chr].map.simple.dists[i] > crossover_where[up_to_crossover]) {
7518 which = 1 - which;
7519 up_to_crossover++;
7520 }
7521 output[2*(i + map.chrs[chr].map.simple.first_marker_index)] =
7522 parent_genome[2*(i + map.chrs[chr].map.simple.first_marker_index) + which];
7523 }
7524 break;
7525 case GSC_LINKAGEGROUP_REORDER:
7526 for (GSC_GENOLEN_T i = 0; i < map.chrs[chr].map.reorder.n_markers; ++i) {
7527 // is it time to invert which parent haplotype we're reading?
7528 while (up_to_crossover < num_crossovers &&
7529 map.chrs[chr].map.reorder.dists[i] > crossover_where[up_to_crossover]) {
7530 which = 1 - which;
7531 up_to_crossover++;
7532 }
7533 output[2*map.chrs[chr].map.reorder.marker_indexes[i]] =
7534 parent_genome[2*map.chrs[chr].map.reorder.marker_indexes[i] + which];
7535 }
7536 break;
7537 default:
7538 break;
7539 }
7540 }
7541 GSC_DELETE_BUFFER(crossover_where);
7542}
7543
7564 const char* parent_genome,
7565 char* output,
7566 const GSC_ID_T map_index) {
7567 /* For cache reasons it'll be better to copy-paste gsc_generate_gamete with
7568 * one extra line added to the inner loop, than to generate a single gamete
7569 * and then scan over `output` again to copy it. */
7570
7571 // assumes rand is already seeded
7572 if (parent_genome == NULL) {
7573 fprintf(stderr, "Could not make this doubled haploid\n");
7574 return;
7575 }
7576 if (map_index >= d->genome.n_maps) {
7577 fprintf(stderr, "Could not generate this gamete: invalid map provided\n");
7578 return;
7579 }
7580 gsc_RecombinationMap map = d->genome.maps[map_index];
7581
7582 // treat each chromosome separately.
7583 GSC_CREATE_BUFFER(crossover_where, double, 100);
7584 for (GSC_GENOLEN_T chr = 0; chr < d->genome.maps[map_index].n_chr; ++chr) {
7585
7586 // Task 1: How many crossovers
7587 int num_crossovers;
7588 switch (map.chrs[chr].type) {
7589 case GSC_LINKAGEGROUP_SIMPLE:
7590 num_crossovers = gsc_randpoi(&d->rng, map.chrs[chr].map.simple.expected_n_crossovers);
7591 break;
7592 case GSC_LINKAGEGROUP_REORDER:
7593 num_crossovers = gsc_randpoi(&d->rng, map.chrs[chr].map.reorder.expected_n_crossovers);
7594 break;
7595 default:
7596 fprintf(stderr, "Linkage group type of group with index %lu of map with index %lu is corrupted\n",
7597 (long unsigned int) chr, (long unsigned int) map_index);
7598 num_crossovers = 0;
7599 }
7600
7601 // Task 2: Find positions of all crossovers
7602 if (num_crossovers > crossover_wherecap) {
7603 GSC_STRETCH_BUFFER(crossover_where,num_crossovers);
7604 }
7605 for (int i = 0; i < num_crossovers; ++i) {
7606 crossover_where[i] = ((double)rand() / (double)RAND_MAX);
7607 }
7608 if (num_crossovers > 1) {
7609 qsort(crossover_where, num_crossovers, sizeof(double), gsc_helper_ascending_double_comparer);
7610 }
7611
7612 // Task 3: Read off the gamete that those crossovers produce.
7613 int which = rnd_pcg_range(&d->rng,0,1); // if this is 0, we start with the left haplotype
7614 int up_to_crossover = 0;
7615 switch (map.chrs[chr].type) {
7616 case GSC_LINKAGEGROUP_SIMPLE:
7617 for (GSC_GENOLEN_T i = 0; i < map.chrs[chr].map.simple.n_markers; ++i) {
7618 // is it time to invert which parent haplotype we're reading?
7619 while (up_to_crossover < num_crossovers &&
7620 map.chrs[chr].map.simple.dists[i] > crossover_where[up_to_crossover]) {
7621 which = 1 - which;
7622 up_to_crossover++;
7623 }
7624 GSC_GENOLEN_T pos = i + map.chrs[chr].map.simple.first_marker_index;
7625 output[2*pos] = parent_genome[2*pos + which];
7626 output[2*pos + 1] = output[2*pos]; // haploid doubling happens here
7627 }
7628 break;
7629 case GSC_LINKAGEGROUP_REORDER:
7630 for (GSC_GENOLEN_T i = 0; i < map.chrs[chr].map.reorder.n_markers; ++i) {
7631 // is it time to invert which parent haplotype we're reading?
7632 while (up_to_crossover < num_crossovers &&
7633 map.chrs[chr].map.reorder.dists[i] > crossover_where[up_to_crossover]) {
7634 which = 1 - which;
7635 up_to_crossover++;
7636 }
7637 GSC_GENOLEN_T pos = map.chrs[chr].map.reorder.marker_indexes[i];
7638 output[2*pos] = parent_genome[2*pos + which];
7639 output[2*pos + 1] = output[2*pos]; // haploid doubling happens here
7640 }
7641 break;
7642 default:
7643 break;
7644 }
7645 }
7646 GSC_DELETE_BUFFER(crossover_where);
7647}
7648
7649
7662 const char* parent_genome,
7663 char* output) {
7664 for (GSC_GENOLEN_T j = 0; j < d->genome.n_markers; ++j) {
7665 output[2*j] = parent_genome[2*j];
7666 output[2*j + 1] = parent_genome[2*j + 1];
7667 }
7668 return;
7669}
7670
7673 FILE* fp = NULL;
7675 char tmpname_p[NAME_LENGTH];
7676 if (g.filename_prefix != NULL) {
7677 strncpy(tmpname_p, g.filename_prefix,
7678 sizeof(char)*(NAME_LENGTH-13));
7679 } else {
7680 strcpy(tmpname_p, "out");
7681 }
7682 strcat(tmpname_p, "-pedigree.txt");
7683 fp = fopen(tmpname_p, "w");
7684 }
7685 return fp;
7686}
7693 const gsc_GenOptions g,
7694 GSC_ID_T* effIndexp) {
7695 FILE* fe = NULL;
7698 if (*effIndexp != GSC_NA_IDX) {
7699 char tmpname_b[NAME_LENGTH];
7700 if (g.filename_prefix != NULL) {
7701 strncpy(tmpname_b, g.filename_prefix,
7702 sizeof(char)*(NAME_LENGTH-7));
7703 } else {
7704 strcpy(tmpname_b, "out");
7705 }
7706 strcat(tmpname_b, "-bv.txt");
7707 fe = fopen(tmpname_b, "w");
7708 }
7709 }
7710 return fe;
7711}
7714 const gsc_GenOptions g) {
7715 FILE* fg = NULL;
7717 char tmpname_g[NAME_LENGTH];
7718 if (g.filename_prefix != NULL) {
7719 strncpy(tmpname_g, g.filename_prefix,
7720 sizeof(char)*(NAME_LENGTH-13));
7721 } else {
7722 strcpy(tmpname_g, "out");
7723 }
7724 strcat(tmpname_g, "-genotype.txt");
7725 fg = fopen(tmpname_g, "w");
7726 // Save genetic markers as header row.
7728 }
7729 return fg;
7730}
7731
7738 gsc_SimData* d,
7739 gsc_AlleleMatrix* tosave) {
7740 if (fp) {
7744 }
7745}
7753 gsc_EffectMatrix* effMatrices,
7754 GSC_ID_T effIndex,
7755 gsc_AlleleMatrix* tosave) {
7756 if (fe && effIndex != GSC_NA_IDX) {
7758 gsc_save_utility_bvs(fe, &it, effMatrices + effIndex);
7760 }
7761}
7768 if (fg) {
7770 gsc_save_utility_genotypes(fg, &it, tosave->n_markers, NULL, GSC_FALSE);
7772 }
7773}
7774
7779 gsc_SimData* d,
7780 const gsc_GenOptions g) {
7781 if (g.will_name_offspring) {
7783 }
7784 if (g.will_allocate_ids) {
7785 for (GSC_LOCALX_T j = 0; j < am->n_genotypes; ++j) {
7786 ++(d->current_id.id);
7787 am->ids[j] = d->current_id;
7788 }
7789 }
7790}
7791
7792
7794 struct {
7801 struct {
7812 struct {
7820 struct {
7822 unsigned int n_gens_selfing;
7824 struct {
7825 GSC_ID_T map_index; // needs to be in first spot to match selfing.map_index
7827 struct {
7831};
7832
7833
7869 const gsc_GenOptions g,
7870 void* parentIterator,
7871 union gsc_datastore_make_genotypes* datastore,
7872 int (*parentChooser)(void*,
7874 GSC_GLOBALX_T*,
7875 gsc_ParentChoice[static 2]),
7876 void (*offspringGenerator)(gsc_SimData*,
7878 gsc_ParentChoice[static 2],
7880 ) {
7881 if (g.family_size < 1 || d == NULL ||
7882 parentChooser == NULL || offspringGenerator == NULL) {
7883 return GSC_NO_GROUP;
7884 }
7885
7886 // create the buffer we'll use to save the output crosses before they're printed.
7888 GSC_LOCALX_T fullness = 0;
7889 GSC_GLOBALX_T counter = 0;
7892
7893 gsc_AlleleMatrix* last = NULL;
7894 gsc_GroupNum output_group = GSC_NO_GROUP;
7895 if (g.will_save_to_simdata) {
7896 last = d->m; // for saving to simdata
7897 while (last->next != NULL) {
7898 last = last->next;
7899 }
7900 output_group = gsc_get_new_group_num( d );
7901 }
7902
7903 // open the output files, if applicable
7905 GSC_ID_T effIndex = GSC_NA_IDX;
7906 FILE* fe = gsc_helper_genoptions_save_bvs_setup(d,g,&effIndex);
7908
7909 //RPACKINSERT GetRNGstate();
7910 // loop through each combination
7911 while (parentChooser(parentIterator, datastore, &counter, parents)) {
7912 ++counter;
7913 for (GSC_GLOBALX_T f = 0; f < g.family_size; ++f, ++fullness) {
7914 //RPACKINSERT R_CheckUserInterrupt();
7915
7916 // when offspring buffer is full, save these outcomes to the file.
7917 if (fullness >= CONTIG_WIDTH) {
7918 offspring->n_genotypes = CONTIG_WIDTH;
7920 gsc_helper_genoptions_save_pedigrees(fp, d, offspring);
7921 gsc_helper_genoptions_save_bvs(fe, d->e, effIndex, offspring);
7923
7924 if (g.will_save_to_simdata) {
7925 last->next = offspring;
7926 last = last->next;
7928 }
7929 fullness = 0; //reset the count and start refilling the matrix
7930 }
7931
7932 // do the cross.
7933 gsc_GenoLocation offspringPos = { .localAM=offspring, .localPos=fullness };
7934 offspringGenerator(d, datastore, parents, offspringPos);
7935 offspring->groups[fullness] = output_group;
7936 if (g.will_track_pedigree) {
7937 offspring->pedigrees[0][fullness] = gsc_get_id(parents[0].loc);
7938 offspring->pedigrees[1][fullness] = gsc_get_id(parents[1].loc);
7939 }
7940 }
7941 }
7942 //RPACKINSERT PutRNGstate();
7943
7944 offspring->n_genotypes = fullness;
7946 gsc_helper_genoptions_save_pedigrees(fp, d, offspring);
7947 gsc_helper_genoptions_save_bvs(fe, d->e, effIndex, offspring);
7949
7950 if (fp) fclose(fp);
7951 if (fe) fclose(fe);
7952 if (fg) fclose(fg);
7953
7954 if (counter > 0 && g.will_save_to_simdata) {
7955 last->next = offspring;
7956 d->n_groups++;
7958 return output_group;
7959 } else {
7960 gsc_delete_allele_matrix( offspring );
7961 return GSC_NO_GROUP;
7962 }
7963}
7964
7974static int gsc_helper_parentchooser_cross_randomly(void* parentIterator,
7975 union gsc_datastore_make_genotypes* datastore,
7976 GSC_GLOBALX_T* counter,
7977 gsc_ParentChoice parents[static 2]) {
7978 gsc_RandomAccessIterator* it = (gsc_RandomAccessIterator*) parentIterator;
7979
7980 GSC_GLOBALX_T parentixs[2] = { 0 };
7981
7982 if (*counter < datastore->rand.n_crosses &&
7983 (datastore->rand.cap == 0 || (*counter) < datastore->rand.cap * datastore->rand.group_size)) {
7984 // get parents, randomly. Must not be identical or already been used too many times.
7985 parentixs[0] = gsc_randomdraw_replacementrules(it[0].d,
7986 datastore->rand.group_size,
7987 datastore->rand.cap,
7988 datastore->rand.uses,
7990 parentixs[1] = gsc_randomdraw_replacementrules(it[0].d,
7991 datastore->rand.group_size,
7992 datastore->rand.cap,
7993 datastore->rand.uses,
7994 parentixs[0]);
7995
7996 if (datastore->rand.cap > 0) {
7997 datastore->rand.uses[parentixs[0]] += 1;
7998 datastore->rand.uses[parentixs[1]] += 1;
7999 }
8000
8001 // Neither of these should fail, if nparents is good.
8002 parents[0].loc = gsc_next_get_nth(parentIterator, parentixs[0]);
8003 parents[1].loc = gsc_next_get_nth(parentIterator, parentixs[1]);
8004 // Reiterate map. Might save us a read to not bother checking their values first.
8005 parents[0].mapindex = datastore->rand.map_index;
8006 parents[1].mapindex = datastore->rand.map_index;
8007 // This will cut short gsc_scaffold_make_new_genotypes execution if either parent is invalid.
8008 return GSC_IS_VALID_LOCATION(parents[0].loc) && GSC_IS_VALID_LOCATION(parents[1].loc);
8009 } else {
8010 return GSC_FALSE;
8011 }
8012}
8013
8027 union gsc_datastore_make_genotypes* datastore,
8028 gsc_ParentChoice parents[static 2],
8029 gsc_GenoLocation putHere) {
8030 // (silly name)
8031 gsc_generate_gamete(d, gsc_get_alleles(parents[0].loc), (gsc_get_alleles(putHere) ), parents[0].mapindex);
8032 gsc_generate_gamete(d, gsc_get_alleles(parents[1].loc), (gsc_get_alleles(putHere)+1), parents[1].mapindex);
8033}
8034
8041 const gsc_GroupNum from_group,
8042 const GSC_GLOBALX_T n_crosses,
8043 const GSC_GLOBALX_T cap) {
8044 GSC_GLOBALX_T g_size = gsc_get_group_size(d, from_group); // might be a better way to do this using the iterator.
8045 if (g_size == 0) {
8046 fprintf(stderr,"Group %lu does not exist\n", (long unsigned int) from_group.num);
8047 return 0;
8048 }
8049
8050 if (n_crosses < 1) {
8051 fprintf(stderr,"Invalid n_crosses value provided: n_crosses must be greater than 0\n");
8052 return 0;
8053 }
8054
8055 if (cap < 0) {
8056 fprintf(stderr,"Invalid cap value provided: cap can't be negative\n");
8057 return 0;
8058 }
8059 if (cap > 0 && cap*g_size < n_crosses) {
8060 fprintf(stderr,"Invalid cap value provided: cap of %lu uses on %lu parents too small to make %lu crosses\n",
8061 (long unsigned int) cap, (long unsigned int) g_size, (long unsigned int) n_crosses);
8062 return 0;
8063 }
8064
8065 return g_size;
8066}
8067
8094 const gsc_GroupNum from_group,
8095 const GSC_GLOBALX_T n_crosses,
8096 const GSC_GLOBALX_T cap,
8097 const gsc_MapID which_map,
8098 const gsc_GenOptions g) {
8099 GSC_GLOBALX_T g_size = gsc_helper_random_cross_checks(d, from_group, n_crosses*2, cap);
8100 if (g_size == 0) {
8101 return GSC_NO_GROUP;
8102 } else if (g_size == 1) {
8103 fprintf(stderr,"Group %lu must contain multiple individuals to be able to perform random crossing\n",
8104 (long unsigned int) from_group.num);
8105 return GSC_NO_GROUP;
8106 }
8107 if (d->genome.n_maps < 1) {
8108 fprintf(stderr,"Crossing requires at least one recombination map loaded\n");
8109 return GSC_NO_GROUP;
8110 }
8111
8112 union gsc_datastore_make_genotypes paramstore = { 0 };
8113 paramstore.rand.n_crosses = n_crosses;
8114 paramstore.rand.group_size = g_size;
8115 paramstore.rand.map_index = 0;
8116 paramstore.rand.cap = cap;
8117 if (cap > 0) {
8118 paramstore.rand.uses = gsc_malloc_wrap(sizeof(*paramstore.rand.uses)*g_size,GSC_TRUE);
8119 memset(paramstore.rand.uses, 0, sizeof(*paramstore.rand.uses)*g_size);
8120 } else {
8121 paramstore.rand.uses = NULL;
8122 }
8123
8124 if (which_map.id != NO_MAP.id) {
8125 paramstore.rand.map_index = gsc_get_index_of_map(d, which_map);
8126 }
8127 if (paramstore.rand.map_index == GSC_NA_IDX) {
8128 fprintf(stderr,"Could not find recombination map with identifier %lu\n", (long unsigned int) which_map.id);
8129 return GSC_NO_GROUP;
8130 }
8131
8132 RandomAccessIterator parentit = gsc_create_randomaccess_iter( d, from_group);
8133
8134 gsc_GroupNum offspring = gsc_scaffold_make_new_genotypes(d, g, (void*) &parentit,
8135 &paramstore,
8138
8140 GSC_FREE(paramstore.rand.uses);
8141 return offspring;
8142}
8143
8167 GSC_GLOBALX_T max,
8169 GSC_GLOBALX_T* member_uses,
8170 GSC_GLOBALX_T noCollision) {
8171 if (max < 1 || (max == 1 && noCollision == 0)) {
8172 return GSC_NA_GLOBALX;
8173 }
8174 if (max > INT_MAX) {
8175 fprintf(stderr, "Drawing a random number with a max of %lu is not supported on the C version"
8176 "with the rnd library. If the max is greater than %d, probabilistic uniformity may be lost"
8177 "or an infinite loop may occur.", (long unsigned int) max, INT_MAX);
8178 }
8179
8180 GSC_GLOBALX_T parentix = 0;
8181 if (cap > 0) { // n uses of each parent is capped at a number cap.
8182 do {
8183 parentix = rnd_pcg_range(&d->rng,0,max - 1);
8184 } while (parentix == noCollision || member_uses[parentix] >= cap);
8185 } else { // no cap on usage of each parent.
8186 do {
8187 parentix = rnd_pcg_range(&d->rng,0,max - 1);
8188 } while (parentix == noCollision);
8189 }
8190 return parentix;
8191}
8192
8205 union gsc_datastore_make_genotypes* datastore,
8206 GSC_GLOBALX_T* counter,
8207 gsc_ParentChoice parents[static 2]) {
8208 // caller function should guarantee that nparents is not 1. How would you make a nonselfed cross then?
8209 gsc_RandomAccessIterator* it = (gsc_RandomAccessIterator*) parentIterator;
8210 size_t parentixs[2] = { 0 };
8211
8212 if (*counter < datastore->rand_btwn.n_crosses &&
8213 (datastore->rand_btwn.cap1 == 0 || (*counter) < datastore->rand_btwn.cap1 * datastore->rand_btwn.group1_size) &&
8214 (datastore->rand_btwn.cap2 == 0 || (*counter) < datastore->rand_btwn.cap2 * datastore->rand_btwn.group2_size)) {
8215 // get parents, randomly. Must not be identical or already been used too many times.
8216 parentixs[0] = gsc_randomdraw_replacementrules(it[0].d,
8217 datastore->rand_btwn.group1_size,
8218 datastore->rand_btwn.cap1,
8219 datastore->rand_btwn.uses1,
8221 parentixs[1] = gsc_randomdraw_replacementrules(it[1].d,
8222 datastore->rand_btwn.group2_size,
8223 datastore->rand_btwn.cap2,
8224 datastore->rand_btwn.uses2,
8226
8227 if (datastore->rand_btwn.cap1 > 0) {
8228 datastore->rand_btwn.uses1[parentixs[0]] += 1;
8229 }
8230 if (datastore->rand_btwn.cap2 > 0) {
8231 datastore->rand_btwn.uses2[parentixs[1]] += 1;
8232 }
8233
8234 parents[0].loc = gsc_next_get_nth(it+0, parentixs[0]);
8235 parents[1].loc = gsc_next_get_nth(it+1, parentixs[1]);
8236 parents[0].mapindex = datastore->rand_btwn.map1_index;
8237 parents[1].mapindex = datastore->rand_btwn.map2_index;
8238 return GSC_IS_VALID_LOCATION(parents[0].loc) && GSC_IS_VALID_LOCATION(parents[1].loc);
8239 }
8240 return GSC_FALSE;
8241}
8242
8280 const gsc_GroupNum group1,
8281 const gsc_GroupNum group2,
8283 const GSC_GLOBALX_T cap1,
8284 const GSC_GLOBALX_T cap2,
8285 const gsc_MapID map1,
8286 const gsc_MapID map2,
8287 const gsc_GenOptions g) {
8290 if (group1_size == 0 || group2_size == 0) {
8291 return GSC_NO_GROUP;
8292 }
8293 if (d->genome.n_maps < 1) {
8294 fprintf(stderr,"Crossing requires at least one recombination map loaded\n");
8295 return GSC_NO_GROUP;
8296 }
8297
8298 union gsc_datastore_make_genotypes paramstore;
8299 paramstore.rand_btwn.n_crosses = n_crosses;
8300 paramstore.rand_btwn.group1_size = group1_size;
8301 paramstore.rand_btwn.group2_size = group2_size;
8302 paramstore.rand_btwn.map1_index = 0;
8303 paramstore.rand_btwn.map2_index = 0;
8304 paramstore.rand_btwn.cap1 = cap1;
8305 paramstore.rand_btwn.cap2 = cap2;
8306 if (cap1 > 0) {
8307 paramstore.rand_btwn.uses1 =
8308 gsc_malloc_wrap(sizeof(*paramstore.rand_btwn.uses1)*group1_size,GSC_TRUE);
8309 memset(paramstore.rand_btwn.uses1, 0, sizeof(*paramstore.rand_btwn.uses1)*group1_size);
8310 } else {
8311 paramstore.rand_btwn.uses1 = NULL;
8312 }
8313 if (cap2 > 0) {
8314 paramstore.rand_btwn.uses2 =
8315 gsc_malloc_wrap(sizeof(*paramstore.rand_btwn.uses2)*group2_size,GSC_TRUE);
8316 memset(paramstore.rand_btwn.uses2, 0, sizeof(*paramstore.rand_btwn.uses2)*group2_size);
8317 } else {
8318 paramstore.rand_btwn.uses2 = NULL;
8319 }
8320
8321 if (map1.id != NO_MAP.id) {
8322 paramstore.rand_btwn.map1_index = gsc_get_index_of_map(d, map1);
8323 }
8324 if (paramstore.rand_btwn.map1_index == GSC_NA_IDX) {
8325 fprintf(stderr,"Could not find recombination map with identifier %lu\n", (long unsigned int) map1.id);
8326 return GSC_NO_GROUP;
8327 }
8328 if (map2.id != NO_MAP.id) {
8329 paramstore.rand_btwn.map2_index = gsc_get_index_of_map(d, map2);
8330 }
8331 if (paramstore.rand_btwn.map2_index == GSC_NA_IDX) {
8332 fprintf(stderr,"Could not find recombination map with identifier %lu\n", (long unsigned int) map2.id);
8333 return GSC_NO_GROUP;
8334 }
8335
8336 gsc_RandomAccessIterator parentit[2] = { gsc_create_randomaccess_iter( d, group1 ),
8337 gsc_create_randomaccess_iter( d, group2 ) };
8338
8339 gsc_GroupNum offspring = gsc_scaffold_make_new_genotypes(d, g, (void*) parentit,
8340 &paramstore,
8343
8344 gsc_delete_randomaccess_iter(&parentit[0]);
8345 gsc_delete_randomaccess_iter(&parentit[1]);
8346 GSC_FREE(paramstore.rand_btwn.uses1);
8347 GSC_FREE(paramstore.rand_btwn.uses2);
8348 return offspring;
8349
8350}
8351
8361static int gsc_helper_parentchooser_cross_targeted(void* parentIterator,
8362 union gsc_datastore_make_genotypes* datastore,
8363 GSC_GLOBALX_T* counter,
8364 gsc_ParentChoice parents[static 2]) {
8365 gsc_RandomAccessIterator* it = (gsc_RandomAccessIterator*) parentIterator;
8366
8367 while (*counter < datastore->targeted.n_crosses) {
8368 if (datastore->targeted.first_parents[*counter] != GSC_NA_GLOBALX &&
8369 datastore->targeted.second_parents[*counter] != GSC_NA_GLOBALX) {
8370 // We only try to "get nth" if it seems like a potentially reasonable value
8371 parents[0].loc = gsc_next_get_nth(it, datastore->targeted.first_parents[*counter]);
8372 parents[1].loc = gsc_next_get_nth(it, datastore->targeted.second_parents[*counter]);
8373 parents[0].mapindex = datastore->targeted.map1_index;
8374 parents[1].mapindex = datastore->targeted.map2_index;
8375
8376 if (GSC_IS_VALID_LOCATION(parents[0].loc) && GSC_IS_VALID_LOCATION(parents[1].loc)) {
8377 return GSC_TRUE;
8378 }
8379 }
8380
8381 // If this was not a valid pair of parents, skip them and move on to the next pair.
8382 ++ datastore->targeted.bad_pairings;
8383 ++ (*counter);
8384 }
8385 return GSC_FALSE;
8386}
8387
8421 const size_t n_combinations,
8422 const GSC_GLOBALX_T* firstParents,
8423 const GSC_GLOBALX_T* secondParents,
8424 const gsc_MapID map1,
8425 const gsc_MapID map2,
8426 const gsc_GenOptions g) {
8427 if (n_combinations < 1) {
8428 fprintf(stderr,"Invalid n_combinations value provided: n_combinations must be greater than 0\n");
8429 return GSC_NO_GROUP;
8430 }
8431 if (d->genome.n_maps < 1) {
8432 fprintf(stderr,"Crossing requires at least one recombination map loaded\n");
8433 return GSC_NO_GROUP;
8434 }
8435
8436 union gsc_datastore_make_genotypes paramstore;
8437 paramstore.targeted.n_crosses = n_combinations;
8438 paramstore.targeted.bad_pairings = 0;
8439 paramstore.targeted.map1_index = 0;
8440 paramstore.targeted.map2_index = 0;
8441 // casting away const but is being used as readonly
8442 paramstore.targeted.first_parents = (GSC_GLOBALX_T*) firstParents;
8443 paramstore.targeted.second_parents = (GSC_GLOBALX_T*) secondParents;
8444
8445 if (map1.id != NO_MAP.id) {
8446 paramstore.targeted.map1_index = gsc_get_index_of_map(d, map1);
8447 }
8448 if (paramstore.targeted.map1_index == GSC_NA_IDX) {
8449 fprintf(stderr,"Could not find recombination map with identifier %lu\n", (long unsigned int) map1.id);
8450 return GSC_NO_GROUP;
8451 }
8452 if (map2.id != NO_MAP.id) {
8453 paramstore.targeted.map2_index = gsc_get_index_of_map(d, map2);
8454 }
8455 if (paramstore.targeted.map2_index == GSC_NA_IDX) {
8456 fprintf(stderr,"Could not find recombination map with identifier %lu\n", (long unsigned int) map2.id);
8457 return GSC_NO_GROUP;
8458 }
8459
8461
8462 gsc_GroupNum offspring = gsc_scaffold_make_new_genotypes(d, g, (void*) &parentit,
8463 &paramstore,
8466
8468 if (paramstore.targeted.bad_pairings > 0) {
8469 fprintf(stderr,"Targeted crossing failed for %lu out of the %lu requested pairings due to one or both genotype indexes being invalid\n", (long unsigned int) paramstore.targeted.bad_pairings, (long unsigned int) n_combinations);
8470 }
8471 if (n_combinations - paramstore.targeted.bad_pairings == 0) {
8472 return GSC_NO_GROUP;
8473 }
8474 return offspring;
8475}
8476
8486static int gsc_helper_parentchooser_selfing(void* parentIterator,
8487 union gsc_datastore_make_genotypes* datastore,
8488 GSC_GLOBALX_T* counter,
8489 gsc_ParentChoice parents[static 2]) {
8491
8492 parents[0].loc = gsc_next_forwards(it);
8493 parents[0].mapindex = datastore->selfing.map_index;
8494 parents[1] = parents[0];
8495
8496 return GSC_IS_VALID_LOCATION(parents[0].loc);
8497}
8498
8511 union gsc_datastore_make_genotypes* datastore,
8512 gsc_ParentChoice parents[static 2],
8513 gsc_GenoLocation putHere) {
8514 unsigned int n = datastore->selfing.n_gens_selfing;
8515
8516 // error checking parents are the same is not done.
8517 // error checking n >= 1 is not done.
8518
8519 char* tmpparent = gsc_get_alleles(parents[0].loc);
8520 GSC_ID_T map = parents[0].mapindex;
8521 GSC_CREATE_BUFFER(tmpchild,char,d->genome.n_markers<<1);
8522 char* output = gsc_get_alleles(putHere);
8523 int n_oddness = n % 2;
8524 for (unsigned int i = 0; i < n; ++i) {
8525 if (i % 2 == n_oddness) {
8526 gsc_generate_gamete(d, tmpparent, tmpchild, map);
8527 gsc_generate_gamete(d, tmpparent, tmpchild+1, map);
8528 tmpparent = tmpchild;
8529 } else {
8530 gsc_generate_gamete(d, tmpparent, output, map);
8531 gsc_generate_gamete(d, tmpparent, output+1, map);
8532 tmpparent = output;
8533 }
8534 }
8535 GSC_DELETE_BUFFER(tmpchild);
8536}
8537
8562 const unsigned int n,
8563 const gsc_GroupNum group,
8564 const gsc_MapID which_map,
8565 const gsc_GenOptions g) {
8566 /*int group_size = gsc_get_group_size( d, group);
8567 if (group_size < 1) {
8568 fprintf(stderr,"Group %d does not exist.\n", group.num);
8569 return GSC_NO_GROUP;
8570 }*/
8571 if (n < 1) {
8572 fprintf(stderr,"Invalid n value provided: Number of generations must be greater than 0\n");
8573 return GSC_NO_GROUP;
8574 }
8575 if (d->genome.n_maps == 0) {
8576 fprintf(stderr,"Selfing requires at least one recombination map loaded\n");
8577 return GSC_NO_GROUP;
8578 }
8579
8580 union gsc_datastore_make_genotypes paramstore;
8581 paramstore.selfing.map_index = 0;
8582 paramstore.selfing.n_gens_selfing = n;
8583
8584 if (which_map.id != NO_MAP.id) {
8585 paramstore.selfing.map_index = gsc_get_index_of_map(d, which_map);
8586 }
8587 if (paramstore.selfing.map_index == GSC_NA_IDX) {
8588 fprintf(stderr,"Could not find recombination map with identifier %lu\n", (long unsigned int) which_map.id);
8589 return GSC_NO_GROUP;
8590 }
8591
8593
8594 return gsc_scaffold_make_new_genotypes(d, g, (void*) &parentit,
8595 &paramstore,
8598}
8599
8609 union gsc_datastore_make_genotypes* datastore,
8610 gsc_ParentChoice parents[static 2],
8611 gsc_GenoLocation putHere) {
8613 gsc_get_alleles(parents[0].loc),
8614 gsc_get_alleles(putHere),
8615 parents[0].mapindex);
8616}
8617
8638 const gsc_GroupNum group,
8639 const gsc_MapID which_map,
8640 const gsc_GenOptions g) {
8641 /*int group_size = gsc_get_group_size( d, group);
8642 if (group_size < 1) {
8643 fprintf(stderr,"Group %d does not exist.\n", group.num);
8644 return GSC_NO_GROUP;
8645 }*/
8646 if (d->genome.n_maps == 0) {
8647 fprintf(stderr,"Crossing requires at least one recombination map loaded\n");
8648 return GSC_NO_GROUP;
8649 }
8650
8651 union gsc_datastore_make_genotypes paramstore = { 0 };
8652
8653 if (which_map.id != NO_MAP.id) {
8654 paramstore.doub_haps.map_index = gsc_get_index_of_map(d, which_map);
8655 }
8656 if (paramstore.doub_haps.map_index == GSC_NA_IDX) {
8657 fprintf(stderr,"Could not find recombination map with identifier %lu\n", (long unsigned int) which_map.id);
8658 return GSC_NO_GROUP;
8659 }
8660
8662
8663 return gsc_scaffold_make_new_genotypes(d, g, (void*) &parentit,
8664 &paramstore,
8667}
8668
8680static int gsc_helper_parentchooser_cloning(void* parentIterator,
8681 union gsc_datastore_make_genotypes* datastore,
8682 GSC_GLOBALX_T* counter,
8683 gsc_ParentChoice parents[static 2]) {
8685
8686 parents[0].loc = gsc_next_forwards(it);
8687 parents[1] = parents[0];
8688
8689 if (GSC_IS_VALID_LOCATION(parents[0].loc)) {
8690 if (datastore->clones.inherit_names) {
8691 datastore->clones.parent_name = gsc_get_name(parents[0].loc);
8692 }
8693 return GSC_TRUE;
8694 } else {
8695 return GSC_FALSE;
8696 }
8697}
8698
8708 union gsc_datastore_make_genotypes* datastore,
8709 gsc_ParentChoice parents[static 2],
8710 gsc_GenoLocation putHere) {
8711 if (datastore->clones.inherit_names && datastore->clones.parent_name != NULL) {
8712 char* tmpname = gsc_malloc_wrap(sizeof(char)*(strlen(datastore->clones.parent_name) + 1),GSC_TRUE);
8713 strcpy(tmpname, datastore->clones.parent_name);
8714 gsc_set_name(putHere,tmpname);
8715 }
8716
8717 gsc_generate_clone(d, gsc_get_alleles(parents[0].loc), gsc_get_alleles(putHere));
8718}
8719
8746 const gsc_GroupNum group,
8747 const _Bool inherit_names,
8748 gsc_GenOptions g) {
8749 /*int group_size = gsc_get_group_size( d, group);
8750 if (group_size < 1) {
8751 fprintf(stderr,"Group %d does not exist.\n", group.num);
8752 return GSC_NO_GROUP;
8753 }*/
8754
8755 union gsc_datastore_make_genotypes paramstore;
8756 paramstore.clones.inherit_names = inherit_names;
8757
8759
8760 return gsc_scaffold_make_new_genotypes(d, g, (void*) &parentit,
8761 &paramstore,
8764}
8765
8766
8784 const gsc_GroupNum from_group,
8785 const gsc_MapID mapID,
8786 const gsc_GenOptions g) {
8787 GSC_GLOBALX_T group_size = gsc_get_group_size( d, from_group );
8788 if (group_size < 2) {
8789 if (group_size == 1) {
8790 fprintf(stderr,"Group %lu does not have enough members to perform crosses\n", (long unsigned int) from_group.num);
8791 } else {
8792 fprintf(stderr,"Group %lu does not exist\n", (long unsigned int) from_group.num);
8793 }
8794 return GSC_NO_GROUP;
8795 }
8797 gsc_get_group_indexes( d, from_group, group_size, group_indexes );
8798
8799 // number of crosses = number of entries in upper triangle of matrix
8800 // = half of (n entries in matrix - length of diagonal)
8801 // = half of (lmatrix * lmatrix - lmatrix);
8802 GSC_GLOBALX_T n_crosses = group_size * (group_size - 1) / 2; //* g.family_size;
8803
8806 GSC_GLOBALX_T* combinations[2] = {combos0, combos1};
8807 GSC_GLOBALX_T cross_index = 0;
8808 for (GSC_GLOBALX_T i = 0; i < group_size; ++i) {
8809 for (GSC_GLOBALX_T j = i + 1; j < group_size; ++j) {
8810 combinations[0][cross_index] = group_indexes[i];
8811 combinations[1][cross_index] = group_indexes[j];
8812
8813 ++cross_index;
8814 }
8815 }
8816
8817 GSC_DELETE_BUFFER(group_indexes);
8818 gsc_GroupNum out = gsc_make_targeted_crosses(d, n_crosses, combinations[0], combinations[1], mapID, mapID, g);
8819 GSC_DELETE_BUFFER(combos0);
8820 GSC_DELETE_BUFFER(combos1);
8821 return out;
8822}
8823
8825 const gsc_MapID mapID, const gsc_EffectID effID, const gsc_GenOptions g) {
8826 fprintf(stderr, "Function gsc_make_n_crosses_from_top_m_percent is deprecated."
8827 "It behaved unintuitively and goes against genomicSimulation principles on division of functionality\n");
8828
8829 return NO_GROUP;
8830}
8831
8859 const char* input_file,
8860 const gsc_MapID map1,
8861 const gsc_MapID map2,
8862 const gsc_GenOptions g) {
8863 struct gsc_TableSize t = gsc_get_file_dimensions(input_file, '\t');
8864 if (t.num_rows < 1) {
8865 fprintf(stderr, "No crosses exist in that file\n");
8866 return GSC_NO_GROUP;
8867 }
8868
8869 //open file
8870 FILE* fp;
8871 if ((fp = fopen(input_file, "r")) == NULL) {
8872 fprintf(stderr, "Failed to open file %s.\n", input_file); exit(1);
8873 }
8874
8877 GSC_GLOBALX_T* combinations[2] = {combos0,combos1};
8878 char buffer[2][NAME_LENGTH];
8879 // for each row in file
8880 GSC_GLOBALX_T bufferi = 0;
8881 for (int filei = 0; filei < t.num_rows; ++filei) {
8882 // load the four grandparents
8883 fscanf(fp, "%s %s \n", buffer[0], buffer[1]);
8884 combinations[0][bufferi] = gsc_get_index_of_name(d->m, buffer[0]);
8885 combinations[1][bufferi] = gsc_get_index_of_name(d->m, buffer[1]);
8886 if (combinations[0][bufferi] < 0 || combinations[1][bufferi] < 0) {
8887 fprintf(stderr, "Parents on file %s line %lu could not be found\n", input_file, (long unsigned int) filei);
8888 } else {
8889 ++bufferi;
8890 }
8891 }
8892
8893 fclose(fp);
8894 gsc_GroupNum out = gsc_make_targeted_crosses(d, bufferi, combinations[0], combinations[1], map1, map2, g);
8895 GSC_DELETE_BUFFER(combos0);
8896 GSC_DELETE_BUFFER(combos1);
8897 return out;
8898}
8899
8933 const char* input_file,
8934 const gsc_MapID map1,
8935 const gsc_MapID map2,
8936 const gsc_GenOptions g) {
8937 struct gsc_TableSize t = gsc_get_file_dimensions(input_file, '\t');
8938 if (t.num_rows < 1) {
8939 fprintf(stderr, "No crosses exist in that file\n");
8940 return GSC_NO_GROUP;
8941 }
8942
8943 //open file
8944 FILE* fp;
8945 if ((fp = fopen(input_file, "r")) == NULL) {
8946 fprintf(stderr, "Failed to open file %s.\n", input_file); exit(1);
8947 }
8948
8951 GSC_GLOBALX_T* combinations[2] = {combos0,combos1};
8952 char buffer[4][NAME_LENGTH];
8953 const char* to_buffer[] = {buffer[0], buffer[1], buffer[2], buffer[3]};
8954 gsc_PedigreeID g0_id[4];
8955 GSC_GLOBALX_T f1_i[2];
8956 // for each row in file
8957 for (GSC_GLOBALX_T i = 0; i < t.num_rows; ++i) {
8958 // load the four grandparents
8959 fscanf(fp, "%s %s %s %s \n", buffer[0], buffer[1], buffer[2], buffer[3]);
8960 gsc_get_ids_of_names(d->m, 4, to_buffer, g0_id);
8961 if (g0_id[0].id == GSC_NO_PEDIGREE.id || g0_id[1].id == GSC_NO_PEDIGREE.id || g0_id[2].id == GSC_NO_PEDIGREE.id || g0_id[3].id == GSC_NO_PEDIGREE.id) {
8962 fprintf(stderr, "Could not go ahead with the line %lu cross - g0 names not in records\n",
8963 (long unsigned int) i);
8964 combinations[0][i] = GSC_NA_GLOBALX;
8965 combinations[1][i] = GSC_NA_GLOBALX;
8966 continue;
8967 }
8968
8969 // identify two parents
8970 f1_i[0] = gsc_get_index_of_child(d->m, g0_id[0], g0_id[1]);
8971 f1_i[1] = gsc_get_index_of_child(d->m, g0_id[2], g0_id[3]);
8972 if (f1_i[0] < 0 || f1_i[1] < 0) {
8973 // try different permutations of the four grandparents.
8974 f1_i[0] = gsc_get_index_of_child(d->m, g0_id[0], g0_id[2]);
8975 f1_i[1] = gsc_get_index_of_child(d->m, g0_id[1], g0_id[3]);
8976 if (f1_i[0] < 0 || f1_i[1] < 0) {
8977 f1_i[0] = gsc_get_index_of_child(d->m, g0_id[0], g0_id[3]);
8978 f1_i[1] = gsc_get_index_of_child(d->m, g0_id[1], g0_id[2]);
8979 if (f1_i[0] < 0 || f1_i[1] < 0) {
8980 fprintf(stderr, "Could not go ahead with the line %lu cross - f1 children do not exist for this quartet\n",
8981 (long unsigned int) i);
8982 combinations[0][i] = GSC_NA_GLOBALX;
8983 combinations[1][i] = GSC_NA_GLOBALX;
8984 continue;
8985 }
8986 }
8987 }
8988
8989 //add them to a combinations list
8990 combinations[0][i] = f1_i[0];
8991 combinations[1][i] = f1_i[1];
8992
8993 }
8994
8995 fclose(fp);
8996 gsc_GroupNum out = gsc_make_targeted_crosses(d, t.num_rows, combinations[0], combinations[1],
8997 map1, map2, g);
8998 GSC_DELETE_BUFFER(combos0);
8999 GSC_DELETE_BUFFER(combos1);
9000 return out;
9001}
9002
9003
9004/*--------------------------------Fitness------------------------------------*/
9005
9021 const gsc_GroupNum group,
9022 const gsc_EffectID effID,
9023 const GSC_GLOBALX_T top_n,
9024 const _Bool lowIsBest) {
9025 const GSC_ID_T effIndex = gsc_get_index_of_eff_set(d, effID);
9026 if (effIndex == GSC_NA_IDX || d->e[effIndex].effects.rows < 1 || d->m == NULL) {
9027 fprintf(stderr, "Either effect matrix or allele matrix does not exist\n");
9028 return GSC_NO_GROUP;
9029 }
9030
9031 GSC_GLOBALX_T group_size = gsc_get_group_size( d, group );
9032 if (group_size == 0) {
9033 fprintf(stderr,"Group %lu does not exist\n", (long unsigned int) group.num);
9034 return GSC_NO_GROUP;
9035 }
9036 GSC_CREATE_BUFFER(group_indexes,GSC_GLOBALX_T,group_size);
9037 gsc_get_group_indexes( d, group, group_size, group_indexes );
9038
9039 if (group_size <= top_n) {
9040 // well we'll just have to move em all
9041 gsc_GroupNum migration = gsc_make_group_from(d, group_size, group_indexes);
9042 return migration;
9043 }
9044
9045 // This should be ordered the same as the indexes
9046 gsc_DecimalMatrix fits = gsc_calculate_bvs( d, group, effID ); // 1 by group_size matrix
9047
9048 // get an array of pointers to those fitnesses
9049 GSC_CREATE_BUFFER(p_fits,double*,fits.cols);
9050 for (size_t i = 0; i < fits.cols; i++) {
9051 p_fits[i] = &(fits.matrix[0][i]);
9052 }
9053
9054 // sort descending
9055 if (lowIsBest) {
9056 qsort(p_fits, fits.cols, sizeof(double*), gsc_helper_ascending_pdouble_comparer);
9057 } else {
9058 qsort(p_fits, fits.cols, sizeof(double*), gsc_helper_descending_pdouble_comparer);
9059 }
9060
9061 // save the indexes of the best n
9062 GSC_CREATE_BUFFER(top_individuals,GSC_GLOBALX_T,top_n);
9063 for (GSC_GLOBALX_T i = 0; i < top_n; i++) {
9064 top_individuals[i] = group_indexes[p_fits[i] - fits.matrix[0]];
9065 }
9066 gsc_delete_dmatrix(&fits);
9067 GSC_DELETE_BUFFER(p_fits);
9068 GSC_DELETE_BUFFER(group_indexes);
9069
9070 // send those n to a new group
9071 gsc_GroupNum out = gsc_make_group_from(d, top_n, top_individuals);
9072 GSC_DELETE_BUFFER(top_individuals);
9073 return out;
9074}
9075
9094 const gsc_GroupNum group,
9095 const gsc_EffectID effID) {
9096 const GSC_ID_T effIndex = gsc_get_index_of_eff_set(d, effID);
9097 if (effIndex == GSC_NA_IDX || d->e[effIndex].effects.rows < 1 || d->m == NULL) {
9098 fprintf(stderr, "Effect matrix does not exist\n");
9099 return gsc_generate_zero_dmatrix(0, 0);
9100 }
9101 gsc_EffectMatrix e = d->e[effIndex];
9102
9103 // casing away const but I promise not to use the iterator to change anything
9105
9107
9109 return bvs;
9110}
9111
9124 const gsc_EffectMatrix* effset) {
9125 if (targets == NULL || effset == NULL) {
9126 fprintf(stderr, "Either targets or marker effects were not provided\n");
9127 return gsc_generate_zero_dmatrix(0, 0);
9128 }
9129
9130 GSC_CREATE_BUFFER(genotypes, char*, 50);
9131 GSC_GLOBALX_T n_genotypes = 0;
9133 while (IS_VALID_LOCATION(loc)) {
9134 if (n_genotypes >= genotypescap) {
9135 GSC_STRETCH_BUFFER(genotypes, 2*n_genotypes);
9136 }
9137 genotypes[n_genotypes] = gsc_get_alleles(loc);
9138 ++n_genotypes;
9139
9140 loc = gsc_next_forwards(targets);
9141 }
9142
9143 GSC_GENOLEN_T n_markers = effset->effects.cols;
9144 gsc_DecimalMatrix sum = gsc_generate_zero_dmatrix(1, n_genotypes);
9145 gsc_DecimalMatrix counts = gsc_generate_zero_dmatrix(n_genotypes, n_markers );
9146 gsc_DecimalMatrix counts2 = gsc_generate_zero_dmatrix(n_genotypes, n_markers );
9147
9148 GSC_GENOLEN_T i = 0; // highest allele index
9149
9150 for (; i < effset->effects.rows - 1; i += 2) {
9151 // get the allele counts in counts
9152 gsc_calculate_utility_allele_counts_pair(n_markers, n_genotypes, (const char**) genotypes,
9153 effset->effect_names[i], &counts, effset->effect_names[i+1], &counts2);
9154
9155 // multiply counts with effects and add to bv sum
9157 &counts2, effset->effects.matrix[i+1]);
9158 }
9159 if (i < effset->effects.rows) { // deal with the last odd-numbered allele
9160 gsc_calculate_utility_allele_counts(n_markers, n_genotypes, (const char**) genotypes,
9161 effset->effect_names[i], &counts);
9162 gsc_add_matrixvector_product_to_dmatrix(&sum, &counts, effset->effects.matrix[i]);
9163 }
9164
9165 GSC_DELETE_BUFFER(genotypes);
9166 gsc_delete_dmatrix(&counts);
9167 gsc_delete_dmatrix(&counts2);
9168
9169 return sum;
9170}
9171
9189 const gsc_GroupNum group,
9190 const char allele) {
9191 // To upgrade this to use iterators instead, we'll need some modifications to DecimalMatrix. Future work.
9192 GSC_CREATE_BUFFER(genotypes, char*, 50);
9193 GSC_GLOBALX_T n_genotypes = 0;
9194 // casing away const but I promise not to use the iterator to change anything
9196
9198 while (IS_VALID_LOCATION(loc)) {
9199 if (n_genotypes >= genotypescap) {
9200 GSC_STRETCH_BUFFER(genotypes, 2*n_genotypes);
9201 }
9202 genotypes[n_genotypes] = gsc_get_alleles(loc);
9203 ++n_genotypes;
9204
9205 loc = gsc_next_forwards(&it);
9206 }
9208
9209 GSC_GENOLEN_T n_markers = d->m->n_markers;
9210 gsc_DecimalMatrix counts = gsc_generate_zero_dmatrix(n_genotypes, n_markers );
9211
9212 gsc_calculate_utility_allele_counts(n_markers, n_genotypes, (const char** const) genotypes,
9213 allele, &counts);
9214
9215 GSC_DELETE_BUFFER(genotypes);
9216 return counts;
9217}
9218
9233 const GSC_GLOBALX_T n_genotypes,
9234 const char** const genotypes,
9235 const char allele,
9236 gsc_DecimalMatrix* counts) {
9237 if (genotypes == NULL || counts == NULL ||
9238 counts->rows < n_genotypes ||
9239 counts->cols < n_markers) {
9240 fprintf(stderr, "Inputs for calculating count matrix are improperly sized: calculation cannot proceed\n"); return;
9241 }
9242
9243 for (GSC_GLOBALX_T i = 0; i < n_genotypes; ++i) {
9244 //RPACKINSERT R_CheckUserInterrupt();
9245 if (genotypes[i] == NULL) {
9246 continue;
9247 }
9248
9249 for (GSC_GENOLEN_T j = 0; j < n_markers; ++j) {
9250 int cell_sum = 0;
9251 if (genotypes[i][2*j] == allele) { ++cell_sum; }
9252 if (genotypes[i][2*j + 1] == allele) { ++cell_sum; }
9253 counts->matrix[i][j] = cell_sum;
9254 }
9255 }
9256}
9257
9281 const GSC_GLOBALX_T n_genotypes,
9282 const char** const genotypes,
9283 const char allele,
9284 gsc_DecimalMatrix* counts,
9285 const char allele2,
9286 gsc_DecimalMatrix* counts2) {
9287 if (genotypes == NULL || counts == NULL || counts2 == NULL ||
9288 counts->rows < n_genotypes || counts2->rows < n_genotypes ||
9289 counts->cols < n_markers || counts2->cols < n_markers) {
9290 fprintf(stderr, "Inputs for calculating count matrix are improperly sized: calculation cannot proceed\n"); return;
9291 }
9292
9293 for (GSC_GLOBALX_T i = 0; i < n_genotypes; ++i) {
9294 //RPACKINSERT R_CheckUserInterrupt();
9295 if (genotypes[i] == NULL) {
9296 continue;
9297 }
9298
9299 for (GSC_GENOLEN_T j = 0; j < n_markers; ++j) {
9300 int cell_sum = 0;
9301 int cell_sum2 = 0;
9302 if (genotypes[i][2*j] == allele) { ++cell_sum; }
9303 else if (genotypes[i][2*j] == allele2) { ++cell_sum2;}
9304 if (genotypes[i][2*j + 1] == allele) { ++cell_sum; }
9305 else if (genotypes[i][2*j + 1] == allele2) { ++cell_sum2;}
9306 counts->matrix[i][j] = cell_sum;
9307 counts2->matrix[i][j] = cell_sum2;
9308 }
9309 }
9310}
9311
9312
9341 const gsc_MapID mapid,
9342 const GSC_ID_T n) {
9343 gsc_MarkerBlocks blocks;
9344 blocks.num_blocks = 0;
9345
9346 if (d->genome.n_maps < 1) {
9347 fprintf(stderr,"Creating blocks by chromosome length requires at least one recombination map loaded\n");
9348 return blocks;
9349 }
9350 GSC_ID_T mapix = 0;
9351 if (mapid.id != NO_MAP.id) { mapix = gsc_get_index_of_map(d, mapid); }
9352 if (mapix >= d->genome.n_maps) {
9353 fprintf(stderr,"We don't have that recombination maps loaded. Using default map\n");
9354 mapix = 0;
9355 }
9356 gsc_RecombinationMap map = d->genome.maps[mapix];
9357
9358 if (n < 1) {
9359 fprintf(stderr,"Invalid n value: number of blocks must be positive\n");
9360 return blocks;
9361 }
9362 if (map.n_chr < 1) {
9363 fprintf(stderr,"Map has no chromosomes, so it cannot be divided into blocks\n");
9364 }
9365
9366 blocks.num_blocks = n * map.n_chr;
9368 blocks.markers_in_block = gsc_malloc_wrap(sizeof(*blocks.markers_in_block) * blocks.num_blocks,GSC_TRUE);
9369 for (GSC_ID_T i = 0; i < blocks.num_blocks; ++i) {
9370 blocks.num_markers_in_block[i] = 0;
9371 blocks.markers_in_block[i] = NULL;
9372 }
9373
9374 GSC_CREATE_BUFFER(temp_markers_in_block, GSC_GENOLEN_T, 128);
9375 GSC_GENOLEN_T bi = 0;
9376
9377 for (GSC_GENOLEN_T chr = 0; chr < map.n_chr; ++chr) {
9378 size_t current_block_filling = 0; //counter of how many blocks we have in this chr so far
9379 double chrpos = 0;
9380 bi = 0;
9381
9382 // loop through each marker in this chromosome
9383 switch (map.chrs[chr].type) {
9384 case GSC_LINKAGEGROUP_SIMPLE:
9385 if (map.chrs[chr].map.simple.n_markers == 1) {
9386 GSC_ID_T b = chr*n + 0;
9387 blocks.markers_in_block[b] = gsc_malloc_wrap(sizeof(*blocks.markers_in_block[b]), GSC_TRUE);
9388 blocks.markers_in_block[b][0] = map.chrs[chr].map.simple.first_marker_index;
9389 ++(blocks.num_markers_in_block[b]);
9390 } else {
9391 for (GSC_GENOLEN_T i = 0; i < map.chrs[chr].map.simple.n_markers; ++i) {
9392 //RPACKINSERT R_CheckUserInterrupt();
9393 chrpos += map.chrs[chr].map.simple.dists[i];
9394 while (current_block_filling < n - 1 && chrpos > current_block_filling / n) {
9395 GSC_ID_T b = chr*n + current_block_filling;
9396 if (blocks.num_markers_in_block[b] > 0) {
9397 GSC_GENOLEN_T bcapacity = sizeof(*blocks.markers_in_block[b])*blocks.num_markers_in_block[b];
9398 blocks.markers_in_block[b] = gsc_malloc_wrap(bcapacity, GSC_TRUE);
9399 memcpy(blocks.markers_in_block[b],temp_markers_in_block,bcapacity);
9400 }
9401
9402 ++current_block_filling;
9403 bi = 0;
9404 }
9405
9406 // save marker
9407 if (bi >= temp_markers_in_blockcap) {
9408 GSC_STRETCH_BUFFER(temp_markers_in_block,2*bi);
9409 }
9410 temp_markers_in_block[bi] = map.chrs[chr].map.simple.first_marker_index + i;
9411 ++(blocks.num_markers_in_block[chr*n + current_block_filling]);
9412 ++bi;
9413
9414 }
9415
9416 GSC_ID_T b = chr*n + current_block_filling;
9417 if (blocks.num_markers_in_block[b] > 0) {
9418 GSC_GENOLEN_T bcapacity = sizeof(*blocks.markers_in_block[b])*blocks.num_markers_in_block[b];
9419 blocks.markers_in_block[b] = gsc_malloc_wrap(bcapacity, GSC_TRUE);
9420 memcpy(blocks.markers_in_block[b],temp_markers_in_block,bcapacity);
9421 }
9422 }
9423 break;
9424
9425 case GSC_LINKAGEGROUP_REORDER:
9426 if (map.chrs[chr].map.simple.n_markers == 1) {
9427 GSC_ID_T b = chr*n + 0;
9428 blocks.markers_in_block[b] = gsc_malloc_wrap(sizeof(*blocks.markers_in_block[b]), GSC_TRUE);
9429 blocks.markers_in_block[b][0] = map.chrs[chr].map.reorder.marker_indexes[0];
9430 ++(blocks.num_markers_in_block[b]);
9431 } else {
9432 for (GSC_GENOLEN_T i = 0; i < map.chrs[chr].map.reorder.n_markers; ++i) {
9433 //RPACKINSERT R_CheckUserInterrupt();
9434 chrpos += map.chrs[chr].map.reorder.dists[i];
9435 while (current_block_filling < n - 1 && chrpos > current_block_filling / n) {
9436 GSC_ID_T b = chr*n + current_block_filling;
9437 if (blocks.num_markers_in_block[b] > 0) {
9438 GSC_GENOLEN_T bcapacity = sizeof(*blocks.markers_in_block[b])*blocks.num_markers_in_block[b];
9439 blocks.markers_in_block[b] = gsc_malloc_wrap(bcapacity, GSC_TRUE);
9440 memcpy(blocks.markers_in_block[b],temp_markers_in_block,bcapacity);
9441 }
9442
9443 ++current_block_filling;
9444 bi = 0;
9445 }
9446
9447 // save marker
9448 if (bi >= temp_markers_in_blockcap) {
9449 GSC_STRETCH_BUFFER(temp_markers_in_block,2*bi);
9450 }
9451 temp_markers_in_block[bi] = map.chrs[chr].map.reorder.marker_indexes[i];
9452 ++(blocks.num_markers_in_block[chr*n + current_block_filling]);
9453 ++bi;
9454
9455 }
9456
9457 GSC_ID_T b = chr*n + current_block_filling;
9458 if (blocks.num_markers_in_block[b] > 0) {
9459 GSC_GENOLEN_T bcapacity = sizeof(*blocks.markers_in_block[b])*blocks.num_markers_in_block[b];
9460 blocks.markers_in_block[b] = gsc_malloc_wrap(bcapacity, GSC_TRUE);
9461 memcpy(blocks.markers_in_block[b],temp_markers_in_block,bcapacity);
9462 }
9463 }
9464 break;
9465 }
9466
9467 }
9468
9469 GSC_DELETE_BUFFER(temp_markers_in_block);
9470
9471 return blocks;
9472}
9473
9495gsc_MarkerBlocks gsc_load_blocks(const gsc_SimData* d, const char* block_file) {
9496 struct gsc_TableSize ts = gsc_get_file_dimensions(block_file, '\t');
9497
9498 gsc_MarkerBlocks blocks;
9499 blocks.num_blocks = ts.num_rows - 1;
9502
9503 FILE* infile;
9504 if ((infile = fopen(block_file, "r")) == NULL) {
9505 fprintf(stderr, "Failed to open file %s.\n", block_file); exit(1);
9506 //return blocks;
9507 }
9508
9509 GSC_GENOLEN_T bufferlen = d->genome.n_markers;
9510 GSC_CREATE_BUFFER(markername,char,CONTIG_WIDTH);
9511 GSC_CREATE_BUFFER(markerbuffer,GSC_GENOLEN_T,bufferlen);
9512 GSC_ID_T bi = 0; // block number
9513
9514 // Ignore the first line
9515 fscanf(infile, "%*[^\n]\n");
9516
9517 // Loop through rows of the file (each row corresponds to a block)
9518 while (fscanf(infile, "%*d %*f %*s %*s ") != EOF) {
9519 //for (int bi = 0; bi < n_blocks; ++bi) {
9520
9521 // Indexes in play:
9522 // bi: index in the blocks struct's arrays of the current block/line in the file
9523 // ni: number of characters so far in the name of the next marker being read from the file
9524 // mi: number of markers that have so far been read from the file for this block
9525 blocks.num_markers_in_block[bi] = 0;
9526 int c;
9527 size_t ni = 0;
9528 GSC_GENOLEN_T mi = 0;
9529
9530 memset(markerbuffer, 0, sizeof(*markerbuffer) * bufferlen);
9531 while ((c = fgetc(infile)) != EOF && c !='\n') {
9532 if (c == ';') {
9533 markername[ni] = '\0';
9534
9535 // identify the index of this marker and save it in the temporary marker buffer `markerbuffer`
9536 GSC_GENOLEN_T markerindex;
9537 if (gsc_get_index_of_genetic_marker(markername, d->genome, &markerindex)) {
9538 ++(blocks.num_markers_in_block[bi]);
9539 markerbuffer[mi] = markerindex;
9540 ++mi;
9541 }
9542
9543 ni = 0;
9544 } else {
9545 markername[ni] = c;
9546 ++ni;
9547 }
9548 }
9549
9550 // copy the markers belonging to this block into the struct
9551 blocks.markers_in_block[bi] = gsc_malloc_wrap(sizeof(GSC_GENOLEN_T) * mi,GSC_TRUE);
9552 for (GSC_GENOLEN_T i = 0; i < mi; ++i) {
9553 blocks.markers_in_block[bi][i] = markerbuffer[i];
9554 }
9555
9556 ++bi;
9557 }
9558
9559 GSC_DELETE_BUFFER(markerbuffer);
9560 GSC_DELETE_BUFFER(markername);
9561 fclose(infile);
9562 return blocks;
9563}
9564
9594 const gsc_MarkerBlocks b,
9595 const gsc_EffectID effID,
9596 const char* output_file,
9597 const gsc_GroupNum group) {
9598 const GSC_ID_T effIndex = gsc_get_index_of_eff_set(d, effID);
9599 if (effIndex == GSC_NA_IDX) {
9600 fprintf(stderr,"Nonexistent effect set with id %lu\n", (long unsigned int) effID.id);
9601 return;
9602 }
9603 gsc_EffectMatrix e = d->e[effIndex];
9604
9605 FILE* outfile;
9606 if ((outfile = fopen(output_file, "w")) == NULL) {
9607 fprintf(stderr, "Failed to open file %s.\n", output_file); exit(1);
9608 }
9609
9610 GSC_CREATE_BUFFER(buffer,char,CONTIG_WIDTH);
9611
9612 GSC_GLOBALX_T gsize = gsc_get_group_size(d, group);
9613 GSC_CREATE_BUFFER(ggenos,char*,gsize);
9614 GSC_CREATE_BUFFER(gnames,char*,gsize);
9615 gsc_get_group_genes(d, group, gsize, ggenos);
9616 gsc_get_group_names(d, group, gsize, gnames);
9618 gsc_get_group_ids(d,group,gsize,gids);
9619
9620 double beffect;
9621
9622 // for each group member
9623 for (GSC_GLOBALX_T i = 0; i < gsize; ++i) {
9624 // for each block
9625 if (gnames[i] != NULL) {
9626 sprintf(buffer, "%s_1", gnames[i]);
9627 } else {
9628 sprintf(buffer, "%lu_1", (long unsigned int) gids[i].id);
9629 }
9630 fwrite(buffer, sizeof(char), strlen(buffer), outfile);
9631
9632 // for each block
9633 for (GSC_ID_T j = 0; j < b.num_blocks; ++j) {
9634 beffect = 0;
9635
9636 // calculate the local BV
9637 for (GSC_GENOLEN_T k = 0; k < b.num_markers_in_block[j]; ++k) {
9638 for (int q = 0; q < e.effects.rows; ++q) {
9639 if (ggenos[i][2 * b.markers_in_block[j][k]] == e.effect_names[q]) {
9640 beffect += e.effects.matrix[q][b.markers_in_block[j][k]];
9641 }
9642 }
9643 }
9644
9645 // print the local BV
9646 fprintf(outfile, " %lf", beffect);
9647 fflush(outfile);
9648 }
9649
9650 if (gnames[i] != NULL) {
9651 sprintf(buffer, "\n%s_2", gnames[i]);
9652 } else {
9653 sprintf(buffer, "\n%lu_2", (long unsigned int) gids[i].id);
9654 }
9655 fwrite(buffer, sizeof(char), strlen(buffer), outfile);
9656
9657 // for each block for the second haplotype
9658 for (GSC_ID_T j = 0; j < b.num_blocks; ++j) {
9659 beffect = 0;
9660 // calculate the local BV
9661 for (GSC_GENOLEN_T k = 0; k < b.num_markers_in_block[j]; ++k) {
9662 for (int q = 0; q < e.effects.rows; ++q) {
9663 if (ggenos[i][2 * b.markers_in_block[j][k] + 1] == e.effect_names[q]) {
9664 beffect += e.effects.matrix[q][b.markers_in_block[j][k]];
9665 }
9666 }
9667 }
9668
9669 // print the local BV
9670 fprintf(outfile, " %lf", beffect);
9671 fflush(outfile);
9672 }
9673 fwrite("\n", sizeof(char), 1, outfile);
9674 }
9675
9676 GSC_DELETE_BUFFER(ggenos);
9677 GSC_DELETE_BUFFER(gnames);
9678 GSC_DELETE_BUFFER(gids);
9679 GSC_DELETE_BUFFER(buffer);
9680 fflush(outfile);
9681 fclose(outfile);
9682}
9683
9711 const gsc_MarkerBlocks b,
9712 const gsc_EffectID effID,
9713 const char* output_file) {
9714 const GSC_ID_T effIndex = gsc_get_index_of_eff_set(d, effID);
9715 if (effIndex == GSC_NA_IDX) {
9716 fprintf(stderr,"Nonexistent effect set with id %lu\n", (long unsigned int) effID.id);
9717 return;
9718 }
9719 gsc_EffectMatrix e = d->e[effIndex];
9720
9721 FILE* outfile;
9722 if ((outfile = fopen(output_file, "w")) == NULL) {
9723 fprintf(stderr, "Failed to open file %s.\n", output_file); exit(1);
9724 }
9725
9726 GSC_CREATE_BUFFER(buffer,char,CONTIG_WIDTH);
9727
9728 double beffect;
9729
9730 // for each group member
9731 AlleleMatrix* m = d->m;
9732 GSC_GLOBALX_T total_i = 0;
9733 do {
9734 for (GSC_LOCALX_T i = 0; i < m->n_genotypes; ++i, ++total_i) {
9735 // for each group member
9736 if (m->names[i] != NULL) {
9737 sprintf(buffer, "%s_1", m->names[i]);
9738 } else {
9739 sprintf(buffer, "%lu_1", (long unsigned int) m->ids[i].id);
9740 }
9741 fwrite(buffer, sizeof(char), strlen(buffer), outfile);
9742
9743 // for each block
9744 for (GSC_ID_T j = 0; j < b.num_blocks; ++j) {
9745 beffect = 0;
9746
9747 // calculate the local BV
9748 for (GSC_GENOLEN_T k = 0; k < b.num_markers_in_block[j]; ++k) {
9749 for (int q = 0; q < e.effects.rows; ++q) {
9750 if (m->alleles[i][2 * b.markers_in_block[j][k]] == e.effect_names[q]) {
9751 beffect += e.effects.matrix[q][b.markers_in_block[j][k]];
9752 }
9753 }
9754 }
9755
9756 // print the local BV
9757 fprintf(outfile, " %lf", beffect);
9758 fflush(outfile);
9759 }
9760
9761 if (m->names[i] != NULL) {
9762 sprintf(buffer, "\n%s_2", m->names[i]);
9763 } else {
9764 sprintf(buffer, "\n%lu_2", (long unsigned int) m->ids[i].id);
9765 }
9766 fwrite(buffer, sizeof(char), strlen(buffer), outfile);
9767
9768 // for each block for the second haplotype
9769 for (GSC_ID_T j = 0; j < b.num_blocks; ++j) {
9770 beffect = 0;
9771 // calculate the local BV
9772 for (GSC_GENOLEN_T k = 0; k < b.num_markers_in_block[j]; ++k) {
9773 for (int q = 0; q < e.effects.rows; ++q) {
9774 if (m->alleles[i][2 * b.markers_in_block[j][k] + 1] == e.effect_names[q]) {
9775 beffect += e.effects.matrix[q][b.markers_in_block[j][k]];
9776 }
9777 }
9778 }
9779
9780 // print the local BV
9781 fprintf(outfile, " %lf", beffect);
9782 fflush(outfile);
9783 }
9784 fwrite("\n", sizeof(char), 1, outfile);
9785 }
9786 } while ((m = m->next) != NULL);
9787
9788 GSC_DELETE_BUFFER(buffer);
9789 fflush(outfile);
9790 fclose(outfile);
9791}
9792
9810 const GSC_ID_T effIndex = gsc_get_index_of_eff_set(d, effID);
9811 if (effIndex == GSC_NA_IDX) {
9812 fprintf(stderr,"Nonexistent effect set with id %lu\n", (long unsigned int) effID.id);
9813 return NULL;
9814 }
9815 gsc_EffectMatrix e = d->e[effIndex];
9816
9817 char* optimal = gsc_malloc_wrap(sizeof(*optimal)* (d->genome.n_markers + 1),GSC_TRUE);
9818
9819 for (GSC_GENOLEN_T i = 0; i < d->genome.n_markers; ++i) {
9820 char best_allele = e.effect_names[0];
9821 double best_score = e.effects.matrix[0][i];
9822 for (int a = 1; a < e.effects.rows; ++a) {
9823 if (e.effects.matrix[a][i] > best_score) {
9824 best_score = e.effects.matrix[a][i];
9825 best_allele = e.effect_names[a];
9826 }
9827 }
9828 optimal[i] = best_allele;
9829 }
9830 optimal[d->genome.n_markers] = '\0';
9831 return optimal;
9832}
9833
9834
9851 const gsc_GroupNum group,
9852 const gsc_EffectID effID) {
9853 const GSC_ID_T effIndex = gsc_get_index_of_eff_set(d, effID);
9854 if (effIndex == GSC_NA_IDX) {
9855 fprintf(stderr,"Nonexistent effect set with id %lu\n", (long unsigned int) effID.id);
9856 return NULL;
9857 }
9858 gsc_EffectMatrix e = d->e[effIndex];
9859 // assumes no alleles in the matrix are spaces.
9860
9861 GSC_GLOBALX_T gsize = gsc_get_group_size(d, group);
9862 if (gsize == 0) {
9863 fprintf(stderr,"Nonexistent group %lu\n", (long unsigned int) group.num);
9864 return NULL;
9865 }
9866 GSC_CREATE_BUFFER(ggenes,char*,gsize);
9867 gsc_get_group_genes(d, group, gsize, ggenes);
9868
9869 char* optimal = gsc_malloc_wrap(sizeof(*optimal)* (d->genome.n_markers + 1),GSC_TRUE);
9870
9871 // for each locus
9872 for (GSC_GENOLEN_T j = 0; j < d->genome.n_markers; ++j) {
9873 char best_allele = '\0';
9874 double best_score;
9875 for (GSC_GLOBALX_T i = 0; i < gsize; ++i) {
9876
9877 // If the allele is different to the previous best (guaranteed if best_allele is not initialised)
9878 if (ggenes[i][2*j] != best_allele) {
9879 // Find it and see if it scores better.
9880 for (int a = 0; a < e.effects.rows; ++a) {
9881
9882 if (e.effect_names[a] == ggenes[i][2*j] &&
9883 (best_allele == '\0' || e.effects.matrix[a][j] > best_score)) { // if it scores better than current best
9884
9885 best_allele = ggenes[i][2*j];
9886 best_score = e.effects.matrix[a][j];
9887
9888 break;
9889 }
9890
9891 }
9892 }
9893
9894 // Repeat for second allele of the group member at that locus
9895 if (ggenes[i][2*j + 1] != best_allele) {
9896 // Find it and see if it scores better.
9897 for (int a = 0; a < e.effects.rows; ++a) {
9898
9899 if (e.effect_names[a] == ggenes[i][2*j + 1] &&
9900 (best_allele == '\0' || e.effects.matrix[a][j] > best_score)) { // if it scores better than current best
9901
9902 best_allele = ggenes[i][2*j + 1];
9903 best_score = e.effects.matrix[a][j];
9904
9905 break;
9906 }
9907
9908 }
9909 }
9910 }
9911 optimal[j] = best_allele;
9912 }
9913
9914 GSC_DELETE_BUFFER(ggenes);
9915 optimal[d->genome.n_markers] = '\0';
9916 return optimal;
9917}
9918
9919
9932 const GSC_ID_T effIndex = gsc_get_index_of_eff_set(d, effID);
9933 if (effIndex == GSC_NA_IDX) {
9934 fprintf(stderr,"Nonexistent effect set with id %lu\n", (long unsigned int) effID.id);
9935 return 0;
9936 }
9937 gsc_EffectMatrix e = d->e[effIndex];
9938
9939 double best_gebv = 0;
9940
9941 for (GSC_GENOLEN_T i = 0; i < d->genome.n_markers; ++i) {
9942 // Find the allele with the highest effect
9943 double best_score = e.effects.matrix[0][i];
9944 for (int a = 1; a < e.effects.rows; ++a) {
9945 if (e.effects.matrix[a][i] > best_score) {
9946 best_score = e.effects.matrix[a][i];
9947 }
9948 }
9949
9950 // add that highest allele to the score twice over
9951 best_gebv += (2*best_score);
9952 }
9953
9954 return best_gebv;
9955}
9956
9974 const gsc_GroupNum group,
9975 const gsc_EffectID effID) {
9976 const GSC_ID_T effIndex = gsc_get_index_of_eff_set(d, effID);
9977 if (effIndex == GSC_NA_IDX) {
9978 fprintf(stderr,"Nonexistent effect set with id %lu\n", (long unsigned int) effID.id);
9979 return 0;
9980 }
9981 gsc_EffectMatrix e = d->e[effIndex];
9982
9983 // assumes no alleles in the matrix are spaces.
9984
9985 GSC_GLOBALX_T gsize = gsc_get_group_size(d, group);
9986 if (gsize == 0) {
9987 fprintf(stderr,"Nonexistent group %lu\n", (long unsigned int) group.num);
9988 return 0;
9989 }
9990 GSC_CREATE_BUFFER(ggenes,char*,gsize);
9991 gsc_get_group_genes(d, group, gsize, ggenes);
9992
9993 double total_score = 0;
9994 char best_allele;
9995 double best_score;
9996
9997 // for each locus
9998 for (GSC_GENOLEN_T j = 0; j < d->genome.n_markers; ++j) {
9999 best_allele = '\0';
10000 best_score = 0;
10001 for (GSC_GLOBALX_T i = 0; i < gsize; ++i) {
10002
10003 // If the allele is different to the previous best (guaranteed if best_allele is not initialised)
10004 if (ggenes[i][2*j] != best_allele) {
10005 // Find it and see if it scores better.
10006 for (int a = 0; a < e.effects.rows; ++a) {
10007
10008 if (e.effect_names[a] == ggenes[i][2*j] &&
10009 (best_allele == '\0' || e.effects.matrix[a][j] > best_score)) { // if it scores better than current best
10010
10011 best_allele = ggenes[i][2*j];
10012 best_score = e.effects.matrix[a][j];
10013
10014 break;
10015 }
10016
10017 }
10018 }
10019
10020 // Repeat for second allele of the group member at that locus
10021 if (ggenes[i][2*j + 1] != best_allele) {
10022 // Find it and see if it scores better.
10023 for (int a = 0; a < e.effects.rows; ++a) {
10024
10025 if (e.effect_names[a] == ggenes[i][2*j + 1] &&
10026 (best_allele == '\0' || e.effects.matrix[a][j] > best_score)) { // if it scores better than current best
10027
10028 best_allele = ggenes[i][2*j + 1];
10029 best_score = e.effects.matrix[a][j];
10030
10031 break;
10032 }
10033
10034 }
10035 }
10036 }
10037 total_score += (2*best_score);
10038 }
10039
10040 GSC_DELETE_BUFFER(ggenes);
10041 return total_score;
10042}
10043
10056 const GSC_ID_T effIndex = gsc_get_index_of_eff_set(d, effID);
10057 if (effIndex == GSC_NA_IDX) {
10058 fprintf(stderr,"Nonexistent effect set with id %lu\n", (long unsigned int) effID.id);
10059 return 0;
10060 }
10061 gsc_EffectMatrix e = d->e[effIndex];
10062
10063 double worst_gebv = 0;
10064 double worst_score;
10065
10066 for (GSC_GENOLEN_T i = 0; i < d->genome.n_markers; ++i) {
10067 // Find the allele with the highest effect
10068 worst_score = e.effects.matrix[0][i];
10069 for (int a = 1; a < e.effects.rows; ++a) {
10070 if (e.effects.matrix[a][i] < worst_score) {
10071 worst_score = e.effects.matrix[a][i];
10072 }
10073 }
10074
10075 // add that highest allele to the score twice over
10076 worst_gebv += (2*worst_score);
10077 }
10078
10079 return worst_gebv;
10080}
10081
10082/*--------------------------------Saving-----------------------------------*/
10083
10101void gsc_save_markerblocks(const char* fname,
10102 const gsc_SimData* d,
10103 const gsc_MarkerBlocks b,
10104 const gsc_MapID labelMapID) {
10105 FILE* f;
10106 if ((f = fopen(fname, "w")) == NULL) {
10107 fprintf(stderr, "Failed to open file %s for writing output\n", fname); return;
10108 }
10109
10110 GSC_ID_T mapix;
10111 if (labelMapID.id == NO_MAP.id || (mapix = gsc_get_index_of_map(d, labelMapID)) == GSC_NA_IDX) {
10113 } else {
10115 }
10116}
10117
10137void gsc_save_genotypes(const char* fname,
10138 const gsc_SimData* d,
10139 const gsc_GroupNum groupID,
10140 const _Bool markers_as_rows) {
10141 FILE* f;
10142 if ((f = fopen(fname, "w")) == NULL) {
10143 fprintf(stderr, "Failed to open file %s for writing output\n", fname); return;
10144 }
10145
10146 // casing away const but I promise not to use the iterator to change anything
10148
10149 gsc_save_utility_genotypes(f, &it, d->genome.n_markers, d->genome.marker_names, markers_as_rows);
10150
10152 fclose(f);
10153}
10154
10175void gsc_save_allele_counts(const char* fname,
10176 const gsc_SimData* d,
10177 const gsc_GroupNum groupID,
10178 const char allele,
10179 const _Bool markers_as_rows) {
10180 FILE* f;
10181 if ((f = fopen(fname, "w")) == NULL) {
10182 fprintf(stderr, "Failed to open file %s for writing output\n", fname); return;
10183 }
10184
10185 // casing away const but I promise not to use the iterator to change anything
10187
10189 markers_as_rows, allele);
10190
10192 fclose(f);
10193}
10194
10216void gsc_save_pedigrees(const char* fname,
10217 const gsc_SimData* d,
10218 const gsc_GroupNum groupID,
10219 const _Bool full_pedigree) {
10220 FILE* f;
10221 if ((f = fopen(fname, "w")) == NULL) {
10222 fprintf(stderr, "Failed to open file %s for writing output\n", fname); return;
10223 }
10224
10225 // casing away const but I promise not to use the iterator to change anything
10227
10228 gsc_save_utility_pedigrees(f, &it, full_pedigree, d->m);
10229
10231 fclose(f);
10232}
10233
10249void gsc_save_bvs(const char* fname,
10250 const gsc_SimData* d,
10251 const gsc_GroupNum groupID,
10252 const gsc_EffectID effID) {
10253 FILE* f;
10254 if ((f = fopen(fname, "w")) == NULL) {
10255 fprintf(stderr, "Failed to open file %s for writing output\n", fname); return;
10256 }
10257
10258 GSC_ID_T effix = gsc_get_index_of_eff_set(d, effID);
10259 if (effix == GSC_NA_IDX) {
10260 fprintf(stderr, "Marker effect set %lu does not exist: cannot calculate breeding values\n", (long unsigned int) effID.id); return;
10261 }
10262
10263 // casing away const but I promise not to use the iterator to change anything
10265
10266 gsc_save_utility_bvs(f, &it, &d->e[effix]);
10267
10269 fclose(f);
10270}
10271
10275 const gsc_LinkageGroup chr,
10276 double* pos) {
10277 GSC_GENOLEN_T offset;
10278 switch (chr.type) {
10279 case GSC_LINKAGEGROUP_SIMPLE:
10280 offset = markerix - chr.map.simple.first_marker_index;
10281 if (offset >= 0 && offset < chr.map.simple.n_markers) {
10282 if (pos != NULL && chr.map.simple.n_markers > 1) {
10283 *pos = chr.map.simple.dists[offset] * chr.map.simple.expected_n_crossovers;
10284 } else {
10285 *pos = 0; // if there is only one marker on chromosome
10286 }
10287 return GSC_TRUE;
10288 } else {
10289 return GSC_FALSE;
10290 }
10291 case GSC_LINKAGEGROUP_REORDER:
10292 for (GSC_GENOLEN_T i = 0; i < chr.map.reorder.n_markers; ++i) {
10293 if (markerix == chr.map.reorder.marker_indexes[i]) {
10294 if (pos != NULL) {
10295 *pos = chr.map.reorder.dists[i] * chr.map.reorder.expected_n_crossovers;
10296 }
10297 return GSC_TRUE;
10298 }
10299 }
10300 return GSC_FALSE;
10301 }
10302 return GSC_NA;
10303}
10304
10363 const gsc_MarkerBlocks b,
10364 const GSC_GENOLEN_T n_markers,
10365 char** const marker_names,
10366 const RecombinationMap* map) {
10367
10368 // Header only gets printed if there are multiple columns.
10369 // (If no map is provided, we print only the third column (markers in each block))
10370 if (map != NULL) {
10371 const char header[] = "Chrom\tLen\tMarkers\n";
10372 fwrite(header, sizeof(char)*strlen(header), 1, f);
10373 }
10374
10375 for (GSC_ID_T i = 0; i < b.num_blocks; ++i) {
10376 if (map != NULL) {
10377 // If we are provided a map, then try to find and print the length of each block
10378 int isonchr = -1;
10379 double len = 0;
10380 if (b.num_markers_in_block[i] > 0) {
10381 double minpos = 0;
10382 double maxpos = 0;
10383 for (GSC_GENOLEN_T chrix = 0; chrix < map->n_chr; ++chrix) {
10385 map->chrs[chrix],&minpos)) {
10386 isonchr = chrix;
10387 maxpos = minpos;
10388 for (GSC_GENOLEN_T j = 1; j < b.num_markers_in_block[i]; ++j) {
10389 double pos;
10391 map->chrs[chrix],&pos)) {
10392 maxpos = (pos > maxpos) ? pos : maxpos;
10393 minpos = (pos < minpos) ? pos : minpos;
10394 } else {
10395 isonchr = -1;
10396 break;
10397 }
10398 }
10399 len = maxpos - minpos;
10400 break;
10401 }
10402 }
10403 }
10404
10405 if (isonchr >= 0) {
10406 fprintf(f,"%lu\t%lf\t",(long unsigned int)isonchr,len*100);
10407 } else {
10408 const char colns[] = "-\t-\t";
10409 fwrite(colns, sizeof(char)*strlen(colns), 1, f);
10410 }
10411 }
10412
10413 // Print the markers contained in the block
10414 for (GSC_GENOLEN_T j = 0; j < b.num_markers_in_block[i]; ++j) {
10415 GSC_GENOLEN_T k = b.markers_in_block[i][j];
10416 if (k <= n_markers) {
10417 fwrite(marker_names[k], sizeof(char)*strlen(marker_names[k]), 1, f);
10418 } else {
10419 fprintf(f,"%lu",(long unsigned int)k);
10420 }
10421 fputc(';',f);
10422 }
10423
10424 fwrite("\n", sizeof(char), 1, f);
10425 }
10426
10427 fflush(f);
10428 return;
10429}
10430
10438 gsc_BidirectionalIterator* targets,
10439 GSC_GENOLEN_T n_markers,
10440 char** const marker_names,
10441 const _Bool markers_as_rows,
10442 void (*bodycell_printer)(FILE*,
10445 void*),
10446 void* bodycell_printer_data) {
10447
10448 // legacy feature: if printing a specific group's members, put the group number in
10449 // the top left corner cell
10450 if (targets != NULL && targets->group.num != NO_GROUP.num) {
10451 fprintf(f,"%lu",(long unsigned int) targets->group.num);
10452 }
10453
10454 GSC_GLOBALX_T ntargets;
10455 if (markers_as_rows) {
10456 ntargets = 0;
10457 // Header row (genotype names)
10458 if (targets != NULL) {
10460 while (IS_VALID_LOCATION(loc)) {
10461 fwrite("\t", sizeof(char), 1, f);
10462 ++ntargets;
10463 char* n = gsc_get_name(loc);
10464 if (n != NULL) {
10465 fwrite(n, sizeof(char)*strlen(n), 1, f);
10466 } else {
10467 fprintf(f, "%lu", (long unsigned int) gsc_get_id(loc).id);
10468 }
10469
10470 loc = gsc_next_forwards(targets);
10471 }
10472 fwrite("\n", sizeof(char), 1, f);
10473 }
10474
10475 // Body (genotypes and genotype names)
10476 // - This is our row counter
10477 GSC_GENOLEN_T row = 0;
10478 gsc_GenoLocation* genos = NULL;
10479 // - This is our genotype position cache, because BidirectionalIterator does not have a built-in cache
10480 if (ntargets > 0 && ((row < n_markers || (ntargets > 0 && row < targets->cachedAM->n_markers)))) {
10481 genos = gsc_malloc_wrap(sizeof(*genos)*ntargets, GSC_FALSE);
10482 if (genos != NULL) {
10483 genos[0] = gsc_set_bidirectional_iter_to_start(targets);
10484 for (GSC_GLOBALX_T i = 1; i < ntargets; ++i) {
10485 genos[i] = gsc_next_forwards(targets);
10486 }
10487 }
10488 }
10489 while (row < n_markers || (ntargets > 0 && row < targets->cachedAM->n_markers)) {
10490 // Row header
10491 if (row < n_markers) {
10492 if (marker_names[row] != NULL) {
10493 fwrite(marker_names[row], sizeof(char)*strlen(marker_names[row]), 1, f);
10494 }
10495 }
10496
10497 // Row body
10498 for (GSC_GLOBALX_T i = 0; i < ntargets; ++i) {
10499 gsc_GenoLocation loc;
10500 if (genos != NULL) {
10501 loc = genos[i];
10502 } else {
10503 loc = (i == 0) ? gsc_set_bidirectional_iter_to_start(targets) :
10504 gsc_next_forwards(targets);
10505 }
10506
10507 fwrite("\t", sizeof(char), 1, f);
10508 bodycell_printer(f,loc,row,bodycell_printer_data);
10509 }
10510
10511 fwrite("\n", sizeof(char), 1, f);
10512 ++row;
10513 }
10514 if (genos != NULL) { GSC_FREE(genos); }
10515
10516 } else { // markers as rows = false
10517 // Header row (marker names)
10518 if (marker_names != NULL) {
10519 for (GSC_GENOLEN_T i = 0; i < n_markers; ++i) {
10520 fwrite("\t", sizeof(char), 1, f);
10521 if (marker_names[i] != NULL) {
10522 fwrite(marker_names[i], sizeof(char)*strlen(marker_names[i]), 1, f);
10523 }
10524 }
10525 fwrite("\n", sizeof(char), 1, f);
10526 }
10527
10528 // Body (genotypes and genotype names)
10529 if (targets != NULL) {
10531 while (IS_VALID_LOCATION(loc)) {
10532 // Row header
10533 char* n = gsc_get_name(loc);
10534 if (n != NULL) {
10535 fwrite(n, sizeof(char)*strlen(n), 1, f);
10536 } else {
10537 fprintf(f, "%lu", (long unsigned int) gsc_get_id(loc).id);
10538 }
10539
10540 // Row body
10541 for (GSC_GENOLEN_T i = 0; i < targets->cachedAM->n_markers; ++i) {
10542 fwrite("\t", sizeof(char), 1, f);
10543 bodycell_printer(f,loc,i,bodycell_printer_data);
10544 }
10545 fwrite("\n", sizeof(char), 1, f);
10546
10547 loc = gsc_next_forwards(targets);
10548 }
10549 }
10550 }
10551
10552 fflush(f);
10553 return;
10554}
10555
10559 gsc_GenoLocation loc,
10560 GSC_GENOLEN_T markerix,
10561 void* NA) {
10562 if (IS_VALID_LOCATION(loc)) {
10563 fwrite(gsc_get_alleles(loc) + 2*markerix, sizeof(char)*2, 1, f);
10564 }
10565}
10566
10570 gsc_GenoLocation loc,
10571 GSC_GENOLEN_T markerix,
10572 void* data) {
10573 if (IS_VALID_LOCATION(loc)) {
10574 char allele = *(char*) data;
10575 int count = 0;
10576 if (get_alleles(loc)[2*markerix] == allele) { ++count; }
10577 if (get_alleles(loc)[2*markerix + 1] == allele) { ++count; }
10578 char out = '0' + count;
10579 fwrite(&out, sizeof(char), 1, f);
10580 }
10581}
10582
10631 gsc_BidirectionalIterator* targets,
10632 GSC_GENOLEN_T n_markers,
10633 char** const marker_names,
10634 const _Bool markers_as_rows) {
10635 gsc_scaffold_save_genotype_info(f, targets, n_markers, marker_names, markers_as_rows,
10637}
10638
10689 GSC_GENOLEN_T n_markers,
10690 char** const marker_names,
10691 const _Bool markers_as_rows,
10692 const char allele) {
10693 gsc_scaffold_save_genotype_info(f, targets, n_markers, marker_names, markers_as_rows,
10694 &gsc_helper_output_countmatrix_cell, (void*)&allele);
10695}
10696
10704 gsc_PedigreeID p1,
10705 gsc_PedigreeID p2,
10706 void (*strprinter)(char*, size_t, void*),
10707 void (*intprinter)(long unsigned int, void*),
10708 void* printer_data) {
10709 gsc_PedigreeID pedigree[2];
10710
10711 // open brackets
10712 strprinter("=(", sizeof(char)*2,printer_data);
10713 char* name;
10714
10715 // enables us to print only the known parent if one is unknown
10716 if (p1.id == GSC_NO_PEDIGREE.id || p2.id == GSC_NO_PEDIGREE.id) {
10717 p1.id = (p1.id >= p2.id) ? p1.id : p2.id; //max of the two
10718 p2.id = p1.id;
10719 }
10720
10721 if (p1.id == p2.id) {
10722 if (p1.id != GSC_NO_PEDIGREE.id) { //print nothing if both are unknown.
10723 // Selfed parent
10724 name = gsc_get_name_of_id( m, p1);
10725 if (name != NULL) {
10726 strprinter(name, sizeof(char)*strlen(name), printer_data);
10727 } else if (p1.id != GSC_NO_PEDIGREE.id) {
10728 intprinter((long unsigned int) p1.id,printer_data);
10729 }
10730
10731 if (gsc_get_parents_of_id(m, p1, pedigree) == 0) {
10732 gsc_scaffold_save_ancestry_of(m, pedigree[0], pedigree[1],strprinter,intprinter,printer_data);
10733 }
10734 }
10735 } else {
10736 // Parent 1
10737 name = gsc_get_name_of_id( m, p1);
10738 if (name != NULL) {
10739 strprinter(name, sizeof(char)*strlen(name),printer_data);
10740 } else if (p1.id != GSC_NO_PEDIGREE.id) {
10741 intprinter((long unsigned int) p1.id,printer_data);
10742 }
10743 if (gsc_get_parents_of_id(m, p1, pedigree) == 0) {
10744 gsc_scaffold_save_ancestry_of(m, pedigree[0], pedigree[1],strprinter,intprinter,printer_data);
10745 }
10746
10747 // separator
10748 strprinter(",", sizeof(char),printer_data);
10749
10750 // Parent 2
10751 name = gsc_get_name_of_id( m, p2);
10752 if (name != NULL) {
10753 strprinter(name, sizeof(char)*strlen(name),printer_data);
10754 } else if (p2.id != GSC_NO_PEDIGREE.id) {
10755 intprinter((long unsigned int) p2.id,printer_data);
10756 }
10757
10758 if (gsc_get_parents_of_id(m, p2, pedigree) == 0) {
10759 gsc_scaffold_save_ancestry_of(m, pedigree[0], pedigree[1],strprinter,intprinter,printer_data);
10760 }
10761
10762 }
10763
10764 // close brackets
10765 strprinter(")", sizeof(char),printer_data);
10766}
10767
10770static void gsc_helper_ancestry_strprinter_file(char* str, size_t strlen, void* data) {
10771 FILE* f = (FILE*) data;
10772 fwrite(str, strlen, 1, f);
10773}
10774
10777static void gsc_helper_ancestry_intprinter_file(long unsigned int i, void* data) {
10778 FILE* f = (FILE*) data;
10779 fprintf(f, "%lu", i);
10780}
10781
10843 const _Bool full_pedigree,
10844 const AlleleMatrix* parent_pedigree_store) {
10845
10846 if (targets == NULL) { return; }
10847
10848 gsc_GenoLocation loc;
10849 switch (full_pedigree) {
10850 case 0:
10852 while (IS_VALID_LOCATION(loc)) {
10853 // Offspring
10854 char* n = gsc_get_name(loc);
10855 if (n != NULL) {
10856 fwrite(n, sizeof(char)*strlen(n), 1, f);
10857 } else {
10858 fprintf(f, "%lu", (long unsigned int) gsc_get_id(loc).id);
10859 }
10860
10861 // Parents
10862 for (int parent = 0; parent < 2; ++parent) {
10863 fwrite("\t", sizeof(char), 1, f);
10864 n = NULL;
10865 gsc_PedigreeID p = (parent == 0) ? gsc_get_first_parent(loc) : gsc_get_second_parent(loc);
10866 if (p.id != GSC_NO_PEDIGREE.id && parent_pedigree_store != NULL) {
10867 n = gsc_get_name_of_id(parent_pedigree_store, p);
10868 }
10869 if (n != NULL) {
10870 fwrite(n, sizeof(char)*strlen(n), 1, f);
10871 } else if (p.id != NO_PEDIGREE.id) {
10872 fprintf(f, "%lu", (long unsigned int) p.id);
10873 }
10874 }
10875
10876 fwrite("\n", sizeof(char), 1, f);
10877 loc = gsc_next_forwards(targets);
10878 }
10879
10880 break;
10881 case 1:
10883 while (IS_VALID_LOCATION(loc)) {
10884 // Offspring
10885 fprintf(f, "%lu\t", (long unsigned int) gsc_get_id(loc).id);
10886 char* n = gsc_get_name(loc);
10887 if (n != NULL) {
10888 fwrite(n, sizeof(char)*strlen(n), 1, f);
10889 }
10890
10891 // Parents (recursively)
10892 if ((gsc_get_first_parent(loc).id != GSC_NO_PEDIGREE.id ||
10894 && parent_pedigree_store != NULL) {
10895 gsc_scaffold_save_ancestry_of(parent_pedigree_store,
10898 }
10899
10900 fwrite("\n", sizeof(char), 1, f);
10901 loc = gsc_next_forwards(targets);
10902 }
10903
10904 break;
10905 }
10906
10907 fflush(f);
10908 return;
10909}
10910
10932 gsc_BidirectionalIterator* targets,
10933 const gsc_EffectMatrix* eff) {
10934 if (targets == NULL || eff == NULL) { return; }
10935
10936 gsc_DecimalMatrix bvs = gsc_calculate_utility_bvs(targets, eff);
10938
10939 for (size_t i = 0; i < bvs.cols; ++i) {
10940 if (IS_VALID_LOCATION(loc)) {
10941 fprintf(f, "%lu", (long unsigned int) gsc_get_id(loc).id);
10942 fwrite("\t", sizeof(char), 1, f);
10943 char* n = gsc_get_name(loc);
10944 if (n != NULL) {
10945 fwrite(n, sizeof(char), strlen(n), f);
10946 }
10947 fwrite("\t", sizeof(char), 1, f);
10948 } else {
10949 fwrite("\t\t", sizeof(char)*2, 1, f);
10950 }
10951
10952 fprintf(f, "%lf", bvs.matrix[0][i]);
10953 fwrite("\n", sizeof(char), 1, f);
10954
10955 loc = gsc_next_forwards(targets);
10956 }
10957
10958 gsc_delete_dmatrix(&bvs);
10959 fflush(f);
10960 return;
10961}
10962
10963
10964#endif
char * gsc_calculate_optimal_possible_haplotype(const gsc_SimData *d, const gsc_GroupNum group, const gsc_EffectID effID)
Calculates the highest-breeding-value haplotype that can be created from the alleles present in a giv...
double gsc_calculate_optimal_possible_bv(const gsc_SimData *d, const gsc_GroupNum group, const gsc_EffectID effID)
Calculates the breeding value of the highest breeding-value genotype that can be created from the all...
gsc_DecimalMatrix gsc_calculate_allele_counts(const gsc_SimData *d, const gsc_GroupNum group, const char allele)
Calculates the number of times at each marker that a particular allele appears.
gsc_DecimalMatrix gsc_calculate_bvs(const gsc_SimData *d, const gsc_GroupNum group, const gsc_EffectID effID)
Calculate the fitness metric/breeding value for every genotype in the simulation or every genotype in...
void gsc_calculate_utility_allele_counts_pair(const unsigned int n_markers, const unsigned int n_genotypes, const char **const genotypes, const char allele, gsc_DecimalMatrix *counts, const char allele2, gsc_DecimalMatrix *counts2)
Calculates the number of times at each marker that two particular alleles appear.
gsc_MarkerBlocks gsc_create_evenlength_blocks_each_chr(const gsc_SimData *d, const gsc_MapID mapid, const unsigned int n)
Divide the genotype into blocks where each block contains all markers within a 1/n length section of ...
gsc_DecimalMatrix gsc_calculate_utility_bvs(gsc_BidirectionalIterator *targets, const gsc_EffectMatrix *effset)
Calculate the fitness metric/breeding value for a set of genotypes.
gsc_GroupNum gsc_split_by_bv(gsc_SimData *d, const gsc_GroupNum group, const gsc_EffectID effID, const unsigned int top_n, const _Bool lowIsBest)
Takes the top_n individuals in the group with the best breeding values/fitnesses and puts them in a n...
void gsc_calculate_group_local_bvs(const gsc_SimData *d, const gsc_MarkerBlocks b, const gsc_EffectID effID, const char *output_file, const gsc_GroupNum group)
Given a set of blocks of markers in a file, for each genotype in a group, calculate the local fitness...
void gsc_calculate_utility_allele_counts(const unsigned int n_markers, const unsigned int n_genotypes, const char **const genotypes, const char allele, gsc_DecimalMatrix *counts)
Calculates the number of times at each marker that a particular allele appears.
void gsc_calculate_local_bvs(const gsc_SimData *d, const gsc_MarkerBlocks b, const gsc_EffectID effID, const char *output_file)
Given a set of blocks of markers in a file, for each genotype saved, calculate the local BV for the f...
double gsc_calculate_minimal_bv(const gsc_SimData *d, const gsc_EffectID effID)
Takes a look at the currently-loaded effect values and returns the lowest possible breeding value any...
char * gsc_calculate_optimal_haplotype(const gsc_SimData *d, const gsc_EffectID effID)
Takes a look at the currently-loaded effect values and creates a string containing the allele with th...
gsc_MarkerBlocks gsc_load_blocks(const gsc_SimData *d, const char *block_file)
Given a file containing definitions of blocks of markers, process that file and return a struct conta...
double gsc_calculate_optimal_bv(const gsc_SimData *d, const gsc_EffectID effID)
Takes a look at the currently-loaded effect values and returns the highest possible breeding value an...
unsigned int gsc_get_group_genes(const gsc_SimData *d, const gsc_GroupNum group_id, unsigned int group_size, char **output)
Gets a shallow copy of the genes/alleles of each member of the group.
unsigned int gsc_get_group_parent_ids(const gsc_SimData *d, const gsc_GroupNum group_id, unsigned int group_size, const int whichParent, gsc_PedigreeID *output)
Gets the ids of either the first or second parent of each member of the group.
unsigned int gsc_get_group_parent_names(const gsc_SimData *d, const gsc_GroupNum group_id, unsigned int group_size, const int whichParent, char **output)
Gets the names of either the first or second parent of each member of the group.
unsigned int gsc_get_group_pedigrees(const gsc_SimData *d, const gsc_GroupNum group_id, unsigned int group_size, char **output)
Gets the full pedigree string (as per gsc_save_group_full_pedigree() ) of each member of the group.
unsigned int gsc_get_group_names(const gsc_SimData *d, const gsc_GroupNum group_id, unsigned int group_size, char **output)
Gets a shallow copy of the names of each member of the group.
unsigned int gsc_get_group_indexes(const gsc_SimData *d, const gsc_GroupNum group_id, unsigned int group_size, unsigned int *output)
Gets the 0-based global indexes of each member of the group.
unsigned int gsc_get_group_ids(const gsc_SimData *d, const gsc_GroupNum group_id, unsigned int group_size, gsc_PedigreeID *output)
Gets the ids of each member of the group.
unsigned int gsc_get_group_bvs(const gsc_SimData *d, const gsc_GroupNum group_id, const gsc_EffectID effID, unsigned int group_size, double *output)
Gets the breeding values/breeding values/fitnesses of each member of the group.
size_t gsc_get_existing_group_counts(gsc_SimData *d, gsc_GroupNum *out_groups, unsigned int *out_sizes)
Identify group numbers that currently have members, and how many members they have.
size_t gsc_get_existing_groups(gsc_SimData *d, gsc_GroupNum *output)
Identify group numbers that currently have members.
unsigned int gsc_get_group_size(const gsc_SimData *d, const gsc_GroupNum group_id)
Function to count the number of genotypes that currently belong to the specified group.
gsc_GroupNum gsc_make_double_crosses_from_file(gsc_SimData *d, const char *input_file, const gsc_MapID map1, const gsc_MapID map2, const gsc_GenOptions g)
Perform crosses between previously-generated offspring of pairs of parents identified by name in a fi...
gsc_GroupNum gsc_make_random_crosses(gsc_SimData *d, const gsc_GroupNum from_group, const unsigned int n_crosses, const unsigned int cap, const gsc_MapID which_map, const gsc_GenOptions g)
Performs random crosses among members of a group.
gsc_GroupNum gsc_make_crosses_from_file(gsc_SimData *d, const char *input_file, const gsc_MapID map1, const gsc_MapID map2, const gsc_GenOptions g)
Perform crosses between pairs of parents identified by name in a file and allocate the resulting offs...
gsc_GroupNum gsc_make_doubled_haploids(gsc_SimData *d, const gsc_GroupNum group, const gsc_MapID which_map, const gsc_GenOptions g)
Creates a doubled haploid from each member of a group.
gsc_GroupNum gsc_make_clones(gsc_SimData *d, const gsc_GroupNum group, const _Bool inherit_names, gsc_GenOptions g)
Creates an identical copy of each member of a group.
gsc_GroupNum gsc_make_random_crosses_between(gsc_SimData *d, const gsc_GroupNum group1, const gsc_GroupNum group2, const unsigned int n_crosses, const unsigned int cap1, const unsigned int cap2, const gsc_MapID map1, const gsc_MapID map2, const gsc_GenOptions g)
Performs random crosses where the first parent comes from one group and the second from another.
gsc_GroupNum gsc_scaffold_make_new_genotypes(gsc_SimData *d, const gsc_GenOptions g, void *parentIterator, union gsc_datastore_make_genotypes *datastore, int(*parentChooser)(void *, union gsc_datastore_make_genotypes *, unsigned int *, gsc_ParentChoice[static 2]), void(*offspringGenerator)(gsc_SimData *, union gsc_datastore_make_genotypes *, gsc_ParentChoice[static 2], gsc_GenoLocation))
Make new genotypes (generic function)
gsc_GroupNum gsc_make_all_unidirectional_crosses(gsc_SimData *d, const gsc_GroupNum from_group, const gsc_MapID mapID, const gsc_GenOptions g)
Perform crosses between all pairs of parents in the group from_group and allocates the resulting offs...
gsc_GroupNum gsc_self_n_times(gsc_SimData *d, const unsigned int n, const gsc_GroupNum group, const gsc_MapID which_map, const gsc_GenOptions g)
Selfs each member of a group for a certain number of generations.
gsc_GroupNum gsc_make_targeted_crosses(gsc_SimData *d, const size_t n_combinations, const unsigned int *firstParents, const unsigned int *secondParents, const gsc_MapID map1, const gsc_MapID map2, const gsc_GenOptions g)
Performs the crosses of pairs of parents whose indexes are provided in an array.
void gsc_delete_label(gsc_SimData *d, const gsc_LabelID which_label)
Clears memory of this label from the simulation and all its genotypes.
void gsc_delete_simdata(gsc_SimData *m)
Deletes a gsc_SimData object and frees its memory.
void gsc_delete_eff_set(gsc_SimData *d, gsc_EffectID effID)
Deletes a particular set of marker effects from memory.
void gsc_delete_randomaccess_iter(gsc_RandomAccessIterator *it)
Deletes a gsc_RandomAccessIterator object and frees its memory.
void gsc_delete_effect_matrix(gsc_EffectMatrix *m)
Deletes an gsc_EffectMatrix object and frees its memory.
void gsc_delete_recombination_map(gsc_SimData *d, const gsc_MapID which_map)
Deletes a particular recombination map from memory.
void gsc_delete_bidirectional_iter(gsc_BidirectionalIterator *it)
Deletes a gsc_BidirectionalIterator object.
void gsc_delete_markerblocks(gsc_MarkerBlocks *b)
Delete a gsc_MarkerBlocks struct.
void gsc_delete_dmatrix(gsc_DecimalMatrix *m)
Deletes a gsc_DecimalMatrix and frees its memory.
void gsc_delete_allele_matrix(gsc_AlleleMatrix *m)
Delete the gsc_AlleleMatrix linked list from m onwards and frees its memory.
void gsc_delete_recombination_map_nointegrity(gsc_RecombinationMap *m)
Deletes and clears the memory of a gsc_RecombinationMap struct.
void gsc_delete_group(gsc_SimData *d, const gsc_GroupNum group_id)
Deletes all genotypes belonging to a particular group.
void gsc_move_genotype(gsc_GenoLocation from, gsc_GenoLocation to, int *label_defaults)
Move all details of the genotype at one gsc_GenoLocation to another gsc_GenoLocation.
void gsc_delete_genome(gsc_KnownGenome *g)
Deletes and clears the memory of a gsc_KnownGenome object and its children.
size_t gsc_split_into_buckets(gsc_SimData *d, const gsc_GroupNum group_id, const size_t n, const unsigned int *counts, gsc_GroupNum *results)
Split a group into n groups of equal size (or size differing only by one, if n does not perfectly div...
size_t gsc_split_by_probabilities(gsc_SimData *d, const gsc_GroupNum group_id, const size_t n, const double *probs, gsc_GroupNum *results)
Allocate each member of the group to one of n groups with custom probabilities for each group.
size_t gsc_scaffold_split_by_somequality(gsc_SimData *d, const gsc_GroupNum group_id, void *somequality_data, gsc_GroupNum(*somequality_tester)(gsc_GenoLocation, void *, size_t, size_t, gsc_GroupNum *), size_t maxentries_results, gsc_GroupNum *results)
Split by some quality (generic function)
gsc_GroupNum gsc_split_evenly_into_two(gsc_SimData *d, const gsc_GroupNum group_id)
Split a group into two groups of equal size (or size differing only by one, if the original group had...
gsc_GroupNum gsc_split_by_label_range(gsc_SimData *d, const gsc_GroupNum group, const gsc_LabelID whichLabel, const int valueLowBound, const int valueHighBound)
Allocates the genotypes with values of a label in a particular range to a new group.
size_t gsc_split_into_individuals(gsc_SimData *d, const gsc_GroupNum group_id, size_t maxentries_results, gsc_GroupNum *results)
Split a group into n one-member groups.
size_t gsc_split_into_halfsib_families(gsc_SimData *d, const gsc_GroupNum group_id, const int parent, size_t maxentries_results, gsc_GroupNum *results)
Split a group into families of half-siblings by shared first or second parent.
size_t gsc_split_evenly_into_n(gsc_SimData *d, const gsc_GroupNum group_id, const size_t n, gsc_GroupNum *results)
Split a group into n groups of equal size (or size differing only by one, if n does not perfectly div...
size_t gsc_scaffold_split_by_someallocation(gsc_SimData *d, const gsc_GroupNum group_id, void *someallocator_data, gsc_GroupNum(*someallocator)(gsc_GenoLocation, gsc_SimData *, void *, size_t, size_t *, gsc_GroupNum *), size_t n_outgroups, gsc_GroupNum *outgroups)
Split by some allocator (generic function)
gsc_GroupNum gsc_combine_groups(gsc_SimData *d, const size_t list_len, const gsc_GroupNum *grouplist)
Combine a set of groups into one group.
gsc_GroupNum gsc_split_by_label_value(gsc_SimData *d, const gsc_GroupNum group, const gsc_LabelID whichLabel, const int valueToSplit)
Allocates the genotypes with a particular value of a label to a new group.
gsc_GroupNum gsc_split_randomly_into_two(gsc_SimData *d, const gsc_GroupNum group_id)
Flip a coin for each member of the group to decide if it should be moved to the new group.
gsc_GroupNum gsc_make_group_from(gsc_SimData *d, const size_t index_list_len, const unsigned int *genotype_indexes)
Take a list of indexes and allocate the genotypes at those indexes to a new group.
size_t gsc_split_randomly_into_n(gsc_SimData *d, const gsc_GroupNum group_id, const size_t n, gsc_GroupNum *results)
Allocate each member of the group to one of n groups with equal probability.
size_t gsc_split_into_families(gsc_SimData *d, const gsc_GroupNum group_id, size_t maxentries_results, gsc_GroupNum *results)
Split a group into families by their pedigrees.
gsc_BidirectionalIterator gsc_create_bidirectional_iter(gsc_SimData *d, const gsc_GroupNum group)
Create a bidirectional iterator.
gsc_AlleleMatrix * gsc_get_nth_AlleleMatrix(gsc_AlleleMatrix *listStart, const unsigned int n)
Get an gsc_AlleleMatrix by index in the linked list.
#define GSC_INVALID_GENO_LOCATION
Constant representing a nonexistent location in the simulation.
gsc_BidirectionalIterator gsc_create_bidirectional_iter_fromAM(gsc_AlleleMatrix *am, const gsc_GroupNum group)
gsc_GenoLocation gsc_next_forwards(gsc_BidirectionalIterator *it)
Get the next location from a bidirectional iterator.
gsc_GenoLocation gsc_set_bidirectional_iter_to_end(gsc_BidirectionalIterator *it)
Initialise a Bidirectional iterator to the end of its sequence.
gsc_RandomAccessIterator gsc_create_randomaccess_iter(gsc_SimData *d, const gsc_GroupNum group)
Create a Random Access Iterator.
gsc_GenoLocation gsc_set_bidirectional_iter_to_start(gsc_BidirectionalIterator *it)
Initialise a Bidirectional iterator to the start of its sequence.
gsc_GenoLocation gsc_next_backwards(gsc_BidirectionalIterator *it)
Get the previous location from a bidirectional iterator.
gsc_GenoLocation gsc_next_get_nth(gsc_RandomAccessIterator *it, const unsigned int n)
Get a location by index using a gsc_RandomAccessIterator.
#define GSC_IS_VALID_LOCATION(g)
Check if a GenoLocation is INVALID_GENO_LOCATION.
static gsc_PedigreeID gsc_get_id(const gsc_GenoLocation loc)
Get the persistent id of a genotype.
static char * gsc_get_name(const gsc_GenoLocation loc)
Get the name of a genotype.
static int gsc_get_label_value(const gsc_GenoLocation loc, const int labelIndex)
Get the value of a specific label of a genotype.
static char * gsc_get_alleles(const gsc_GenoLocation loc)
Get the alleles of a genotype.
static void gsc_set_group(const gsc_GenoLocation loc, const gsc_GroupNum group)
Set the current group membership of a genotype.
static gsc_PedigreeID gsc_get_first_parent(const gsc_GenoLocation loc)
Get the first/left parent of a genotype.
static gsc_PedigreeID gsc_get_second_parent(const gsc_GenoLocation loc)
Get the second/right parent of a genotype.
static void gsc_set_name(const gsc_GenoLocation loc, char *name)
Set the name of a genotype.
static gsc_GroupNum gsc_get_group(const gsc_GenoLocation loc)
Get the current group membership of a genotype.
gsc_AlleleMatrix * gsc_create_empty_allelematrix(const unsigned int n_markers, const unsigned int n_labels, const int *labelDefaults, const unsigned int n_genotypes)
Creator for an empty gsc_AlleleMatrix object of a given size.
gsc_EffectID gsc_load_effectfile(gsc_SimData *d, const char *filename)
Populates a gsc_SimData combination with effect values.
struct gsc_MultiIDSet gsc_load_data_files(gsc_SimData *d, const char *genotype_file, const char *map_file, const char *effect_file, const gsc_FileFormatSpec format)
Populates a gsc_SimData object with marker allele data, a genetic map, and (optionally) marker effect...
gsc_GroupNum gsc_load_genotypefile(gsc_SimData *d, const char *filename, const gsc_FileFormatSpec format)
Load a set of genotypes to a gsc_SimData object.
gsc_MapID gsc_create_uniformspaced_recombmap(gsc_SimData *d, unsigned int n_markers, char **markernames, double expected_n_recombinations)
Create a uniformly-spaced gsc_RecombinationMap from a list of marker names and save to SimData.
void gsc_clear_simdata(gsc_SimData *d)
Clear a gsc_SimData object on the heap.
gsc_SimData * gsc_create_empty_simdata(unsigned int RNGseed)
Creator for an empty gsc_SimData object on the heap.
gsc_MapID gsc_load_mapfile(gsc_SimData *d, const char *filename)
Load a genetic map to a gsc_SimData object.
gsc_MapID gsc_create_recombmap_from_markerlist(gsc_SimData *d, unsigned int n_markers, struct gsc_MapfileUnit *markerlist)
Parse a list of markers/chrs/positions into a gsc_RecombinationMap and save to SimData.
int gsc_add_matrixvector_product_to_dmatrix(gsc_DecimalMatrix *result, const gsc_DecimalMatrix *a, const double *b)
Multiply a gsc_DecimalMatrix to a vector, and add that product to the first column of a provided gsc_...
int gsc_randpoi(rnd_pcg_t *rng, double lambda)
Generates randomly a number from the Poisson distribution with parameter lambda, using the Knuth appr...
gsc_DecimalMatrix gsc_generate_zero_dmatrix(const size_t r, const size_t c)
Generates a matrix of c columns, r rows with all 0.
int gsc_add_doublematrixvector_product_to_dmatrix(gsc_DecimalMatrix *result, const gsc_DecimalMatrix *amat, const double *avec, const gsc_DecimalMatrix *bmat, const double *bvec)
Multiply two sets of a gsc_DecimalMatrix and vector, and add both products to the first column of a p...
void gsc_generate_clone(gsc_SimData *d, const char *parent_genome, char *output)
Get an identical copy of a given genotype.
void gsc_generate_doubled_haploid(gsc_SimData *d, const char *parent_genome, char *output, const unsigned int map_index)
Get the alleles of the outcome of producing a doubled haploid from a gamete from a given parent.
void gsc_generate_gamete(gsc_SimData *d, const char *parent_genome, char *output, const unsigned int map_index)
Fills a char* with the simulated result of meiosis (reduction and recombination) from the marker alle...
static int gsc_has_same_alleles_window(const char *g1, const char *g2, const size_t start, const size_t w)
Simple operator to determine if at markers with indexes i to i+w inclusive, two genotypes share at le...
int gsc_calculate_recombinations_from_file(gsc_SimData *d, const char *input_file, const char *output_file, int window_len, int certain)
Provides guesses as to the location of recombination events that led to the creation of certain genot...
int * gsc_calculate_min_recombinations_fw1(gsc_SimData *d, gsc_MapID mapid, char *parent1, unsigned int p1num, char *parent2, unsigned int p2num, char *offspring, int certain)
Identify markers in the genotype of offspring where recombination from its parents occured.
static int gsc_has_same_alleles(const char *p1, const char *p2, const size_t i)
Simple operator to determine if at marker i, two genotypes share at least one allele.
int * gsc_calculate_min_recombinations_fwn(gsc_SimData *d, gsc_MapID mapid, char *parent1, unsigned int p1num, char *parent2, unsigned int p2num, char *offspring, int window_size, int certain)
Identify markers in the genotype of offspring where recombination from its parents occured,...
void gsc_save_utility_markerblocks(FILE *f, const gsc_MarkerBlocks b, const unsigned int n_markers, char **const marker_names, const gsc_RecombinationMap *map)
Prints the markers contained in a set of blocks to a file.
void gsc_save_utility_pedigrees(FILE *f, gsc_BidirectionalIterator *targets, const _Bool full_pedigree, const gsc_AlleleMatrix *parent_pedigree_store)
Prints pedigrees to a file.
void gsc_save_markerblocks(const char *fname, const gsc_SimData *d, const gsc_MarkerBlocks b, const gsc_MapID labelMapID)
Prints the markers contained in a set of blocks to a file.
void gsc_save_utility_allele_counts(FILE *f, gsc_BidirectionalIterator *targets, unsigned int n_markers, char **const marker_names, const _Bool markers_as_rows, const char allele)
Prints allele counts of simulated genotypes to a file.
void gsc_save_bvs(const char *fname, const gsc_SimData *d, const gsc_GroupNum groupID, const gsc_EffectID effID)
Prints breeding values of genotypes in the simulation to a file.
void gsc_save_utility_bvs(FILE *f, gsc_BidirectionalIterator *targets, const gsc_EffectMatrix *eff)
Calculate and print breeding values to a file.
void gsc_save_allele_counts(const char *fname, const gsc_SimData *d, const gsc_GroupNum groupID, const char allele, const _Bool markers_as_rows)
Prints allele counts of genotypes from the simulation to a file.
void gsc_save_genotypes(const char *fname, const gsc_SimData *d, const gsc_GroupNum groupID, const _Bool markers_as_rows)
Prints genotypes from the simulation to a file.
void gsc_save_utility_genotypes(FILE *f, gsc_BidirectionalIterator *targets, unsigned int n_markers, char **const marker_names, const _Bool markers_as_rows)
Prints simulated genotypes to a file.
void gsc_save_pedigrees(const char *fname, const gsc_SimData *d, const gsc_GroupNum groupID, const _Bool full_pedigree)
Prints pedigrees of genotypes in the simulation to a file.
char * gsc_get_genes_of_index(const gsc_AlleleMatrix *start, const unsigned int index)
Get the alleles of a genotype by its index.
char * gsc_get_name_of_id(const gsc_AlleleMatrix *start, const gsc_PedigreeID id)
Returns the name of the genotype with a given id.
void gsc_get_ids_of_names(const gsc_AlleleMatrix *start, const size_t n_names, const char **names, gsc_PedigreeID *output)
Search for genotypes with certain names in a linked list of gsc_AlleleMatrix and save the ids of thos...
unsigned int gsc_get_index_of_child(const gsc_AlleleMatrix *start, const gsc_PedigreeID parent1id, const gsc_PedigreeID parent2id)
Search for a genotype with parentage matching two given parent ids in a linked list of gsc_AlleleMatr...
unsigned int gsc_get_index_of_name(const gsc_AlleleMatrix *start, const char *name)
Search for a genotype with a particular name in a linked list of gsc_AlleleMatrix,...
gsc_PedigreeID gsc_get_id_of_index(const gsc_AlleleMatrix *start, const unsigned int index)
Get the id of a genotype by its index.
int gsc_get_parents_of_id(const gsc_AlleleMatrix *start, const gsc_PedigreeID id, gsc_PedigreeID output[static 2])
Saves the ids of the parents of a genotype with a particular id to the output array output.
#define AlleleMatrix
#define NO_EFFECTSET
#define NO_MAP
#define get_alleles
#define delete_bidirectional_iter
#define NA
#define IS_VALID_LOCATION
#define INVALID_GENO_LOCATION
#define GenoLocation
#define NO_PEDIGREE
#define RandomAccessIterator
#define NO_GROUP
#define SimData
#define FileFormatSpec
#define RecombinationMap
#define GSC_FINALISE_BUFFER(n, as, nentries)
Macro to convert a stretchy buffer to a solid heap vector.
gsc_GenotypeFileCellStyle
Represent possible representations of alleles at a marker in a genotype file.
gsc_TableFileReader gsc_tablefilereader_create(const char *filename)
Open a file for reading with gsc_TableFileReader.
#define GSC_NO_LABEL
Empty/null value for custom label identifiers.
#define GSC_STRETCH_BUFFER(n, newlen)
Macro to expand the capacity of a stretchy buffer.
void gsc_tablefilecell_deep_copy(gsc_TableFileCell *c)
Allocate memory to store a deep copy of a gsc_TableFileCell, if previously only a shallow copy.
void gsc_tablefilereader_close(gsc_TableFileReader *tbl)
Close a gsc_TableFileReader's file pointer.
#define GSC_DELETE_BUFFER(n)
Macro to delete a stretchy buffer.
#define GSC_NO_EFFECTSET
Empty/null value for effect set identifiers.
enum gsc_TableFileCurrentStatus gsc_helper_tablefilereader_classify_char(gsc_TableFileReader *tbl)
Classify the character under the cursor of a TableFileReader as cell contents or otherwise.
gsc_TableFileCell gsc_tablefilereader_get_next_cell(gsc_TableFileReader *tbl)
Read forwards in TableFileReader and return the next cell's contents, as well as how many column gaps...
gsc_TableFileCurrentStatus
Represent possible states of the cursor of a gsc_TableFileReader.
#define GSC_CREATE_BUFFER(n, type, length)
Macro to create a stretchy buffer of any type and some length.
const gsc_GenOptions GSC_BASIC_OPT
Default parameter values for GenOptions, to help with quick scripts and prototypes.
gsc_FileFormatSpec gsc_define_matrix_format_details(const GSC_LOGICVAL has_header, const GSC_LOGICVAL markers_as_rows, const enum gsc_GenotypeFileCellStyle cell_style)
Give genomicSimulation hints on the format of a genotype matrix file to be loaded.
void gsc_helper_tablefilereader_refill_buffer(gsc_TableFileReader *tbl)
Read another buffer's worth of characters from a gsc_TableFileReader's file.
#define GSC_NO_PEDIGREE
Empty/null value for pedigree fields.
gsc_GenotypeFileType
Enumerate types of genotype files that the simulation knows how to load.
#define GSC_NO_GROUP
Empty/null value for group allocations.
@ GSC_GENOTYPECELLSTYLE_SLASHPAIR
@ GSC_GENOTYPECELLSTYLE_PAIR
@ GSC_GENOTYPECELLSTYLE_UNKNOWN
@ GSC_GENOTYPECELLSTYLE_ENCODED
@ GSC_GENOTYPECELLSTYLE_COUNT
@ GSC_TABLEFILE_ERROR_EOF
@ GSC_TABLEFILE_ERROR_EOBUF
@ GSC_TABLEFILE_COLUMNGAP
@ GSC_TABLEFILE_NEWLINE
@ GSC_TABLEFILE_CONTENTS
@ GSC_GENOTYPEFILE_MATRIX
Either a marker-by-line matrix, where each marker is a row, or a line-by-marker matrix,...
@ GSC_GENOTYPEFILE_UNKNOWN
@ GSC_GENOTYPEFILE_VCF
@ GSC_GENOTYPEFILE_BED
@ GSC_GENOTYPEFILE_PED
gsc_GroupNum gsc_get_next_free_group_num(const size_t n_existing_groups, const gsc_GroupNum *existing_groups, size_t *cursor, gsc_GroupNum previous)
Iterator to get the next currently-free group number.
unsigned int gsc_get_from_ordered_pedigree_list(const gsc_PedigreeID target, const unsigned int listLen, const gsc_PedigreeID *list)
Binary search through list of unsigned integers.
gsc_GroupNum gsc_get_new_group_num(gsc_SimData *d)
Function to identify the next sequential integer that does not identify a group that currently has me...
void gsc_change_label_to(gsc_SimData *d, const gsc_GroupNum whichGroup, const gsc_LabelID whichLabel, const int setTo)
Set the values of a custom label.
struct gsc_TableSize gsc_get_file_dimensions(const char *filename, const char sep)
Opens a table file and reads the number of columns and rows (including headers) separated by sep into...
int gsc_get_integer_digits(const int i)
Count and return the number of digits in i.
unsigned int gsc_get_index_of_map(const gsc_SimData *d, const gsc_MapID map)
Function to identify the lookup index of a recombination map identifier.
unsigned int gsc_get_index_of_label(const gsc_SimData *d, const gsc_LabelID label)
Function to identify the label lookup index of a label identifier.
gsc_MapID gsc_get_new_map_id(const gsc_SimData *d)
Function to identify the next sequential integer that is not already allocated to a map ID in the sim...
void gsc_shuffle_up_to(rnd_pcg_t *rng, void *sequence, const size_t item_size, const size_t total_n, const size_t n_to_shuffle)
Produce a random ordering of the first n elements in an array using a (partial) Fisher-Yates shuffle.
void gsc_change_label_default(gsc_SimData *d, const gsc_LabelID whichLabel, const int newDefault)
Set the default value of a custom label.
size_t gsc_get_from_ordered_str_list(const char *target, const size_t listLen, const char **list)
Binary search through a list of strings.
size_t gsc_get_from_unordered_str_list(const char *target, const size_t listLen, const char **list)
Linear search through a list of strings.
gsc_EffectID gsc_get_new_eff_set_id(const gsc_SimData *d)
Function to identify the next sequential integer that is not already allocated to a marker effect set...
void gsc_get_n_new_group_nums(gsc_SimData *d, const size_t n, gsc_GroupNum *result)
Function to identify the next n sequential integers that do not identify a group that currently has m...
void gsc_change_label_to_values(gsc_SimData *d, const gsc_GroupNum whichGroup, const unsigned int startIndex, const gsc_LabelID whichLabel, const size_t n_values, const int *values)
Copy a vector of integers into a custom label.
gsc_LabelID gsc_get_new_label_id(const gsc_SimData *d)
Function to identify the next sequential integer that is not already allocated to a label in the simu...
unsigned int gsc_get_index_of_eff_set(const gsc_SimData *d, const gsc_EffectID eff_set_id)
Function to identify the lookup index of a marker effect set identifier.
gsc_LabelID gsc_create_new_label(gsc_SimData *d, const int setTo)
Initialises a new custom label.
void gsc_change_names_to_values(gsc_SimData *d, const gsc_GroupNum whichGroup, const unsigned int startIndex, const size_t n_values, const char **values)
Copy a vector of strings into the genotype name field.
_Bool gsc_get_index_of_genetic_marker(const char *target, gsc_KnownGenome g, unsigned int *out)
Return whether or not a marker name is present in the tracked markers, and at what index.
void gsc_change_allele_symbol(gsc_SimData *d, const char *which_marker, const char from, const char to)
Replace all occurences of a given allele with a different symbol representation.
void gsc_change_label_by_amount(gsc_SimData *d, const gsc_GroupNum whichGroup, const gsc_LabelID whichLabel, const int byValue)
Increment the values of a custom label.
void gsc_condense_allele_matrix(gsc_SimData *d)
A function to tidy the internal storage of genotypes after addition or deletion of genotypes in the g...
unsigned int gsc_randomdraw_replacementrules(gsc_SimData *d, unsigned int max, unsigned int cap, unsigned int *member_uses, unsigned int noCollision)
Randomly pick a number in a range, optionally with a cap on how many times a number can be picked,...
static gsc_GroupNum gsc_helper_split_by_allocator_knowncounts(gsc_GenoLocation loc, gsc_SimData *d, void *datastore, size_t n_outgroups, size_t *subgroupsfound, gsc_GroupNum *outgroups)
static gsc_GroupNum gsc_helper_split_by_quality_halfsib2(gsc_GenoLocation loc, void *datastore, size_t maxgroups, size_t groupsfound, gsc_GroupNum *results)
static int gsc_helper_parentchooser_cross_randomly(void *parentIterator, union gsc_datastore_make_genotypes *datastore, unsigned int *counter, gsc_ParentChoice parents[static 2])
parentChooser function parameter for gsc_make_random_crosses.
static void gsc_helper_genoptions_save_genotypes(FILE *fg, gsc_AlleleMatrix *tosave)
save-as-you-go (genotypes/alleles)
static GSC_LOGICVAL gsc_helper_parse_3cell_header(gsc_TableFileReader *tf, const char **canonical_titles, int *col_order, gsc_TableFileCell *unprocessedqueue, size_t *queuesize)
Header row reading and processing for map and effect set files.
static struct gsc_EmptyListNavigator gsc_create_emptylistnavigator(gsc_SimData *d, gsc_GroupNum allocation_group)
Create a new gsc_EmptyListNavigator, including an empty AlleleMatrix suitable for inserting into the ...
static FILE * gsc_helper_genoptions_save_pedigrees_setup(const gsc_GenOptions g)
Opens file for writing save-as-you-go pedigrees in accordance with gsc_GenOptions.
static void gsc_helper_output_genotypematrix_cell(FILE *f, gsc_GenoLocation loc, unsigned int markerix, void *GSC_NA)
Kernel for gsc_scaffold_save_genotype_info, when the goal is to save the (phased) allele pairs of eac...
static void gsc_helper_make_offspring_doubled_haploids(gsc_SimData *d, union gsc_datastore_make_genotypes *datastore, gsc_ParentChoice parents[static 2], gsc_GenoLocation putHere)
offspringGenerator function parameter for gsc_make_doubled_haploids.
static void gsc_helper_genoptions_save_bvs(FILE *fe, gsc_EffectMatrix *effMatrices, unsigned int effIndex, gsc_AlleleMatrix *tosave)
save-as-you-go (breeding values)
static int gsc_helper_parentchooser_cross_targeted(void *parentIterator, union gsc_datastore_make_genotypes *datastore, unsigned int *counter, gsc_ParentChoice parents[static 2])
parentChooser function parameter for gsc_make_targeted_crosses.
static int gsc_helper_parentchooser_cross_randomly_between(void *parentIterator, union gsc_datastore_make_genotypes *datastore, unsigned int *counter, gsc_ParentChoice parents[static 2])
parentChooser function parameter for gsc_make_random_crosses_between.
static FILE * gsc_helper_genoptions_save_genotypes_setup(const gsc_SimData *d, const gsc_GenOptions g)
Opens file for writing save-as-you-go genotypes in accordance with gsc_GenOptions.
static gsc_MapID gsc_helper_insert_recombmap_into_simdata(gsc_SimData *d, gsc_RecombinationMap map)
Save a RecombinationMap to the SimData and allocate it a mapID.
static void gsc_set_names(gsc_AlleleMatrix *a, const char *prefix, const int suffix, const unsigned int from_index)
Fills the designated section of the .names array in an gsc_AlleleMatrix with the pattern "`prefix`ind...
static gsc_EffectID gsc_helper_insert_eff_set_into_simdata(gsc_SimData *d, gsc_EffectMatrix effset)
Save an EffectMatrix to the SimData and allocate it an EffectID.
static struct gsc_GenotypeFile_MatrixFormat gsc_helper_genotypefile_matrix_detect_orientation(const gsc_SimData *d, const gsc_TableFileCell *cellqueue, const size_t firstrowlen, const size_t queuelen, struct gsc_GenotypeFile_MatrixFormat format, const char *filenameforlog)
Determine whether a genotype matrix is row- or column-oriented.
static void gsc_scaffold_save_genotype_info(FILE *f, gsc_BidirectionalIterator *targets, unsigned int n_markers, char **const marker_names, const _Bool markers_as_rows, void(*bodycell_printer)(FILE *, gsc_GenoLocation, unsigned int, void *), void *bodycell_printer_data)
Prints a matrix of genotype information to a file.
static int gsc_helper_ascending_pdouble_comparer(const void *pp0, const void *pp1)
Comparator function for qsort.
static struct gsc_GenotypeFile_MatrixFormat gsc_helper_genotypefile_matrix_detect_header(const gsc_TableFileCell *cellqueue, const size_t firstrowlen, const size_t queuelen, struct gsc_GenotypeFile_MatrixFormat format, const char *filenameforlog)
Determine whether a genotype matrix has a header row or not.
static void gsc_helper_make_offspring_self_n_times(gsc_SimData *d, union gsc_datastore_make_genotypes *datastore, gsc_ParentChoice parents[static 2], gsc_GenoLocation putHere)
offspringGenerator function parameter for gsc_self_n_times.
static int gsc_helper_descending_pdouble_comparer(const void *pp0, const void *pp1)
Comparator function for qsort.
static int gsc_helper_ascending_double_comparer(const void *pp0, const void *pp1)
Comparator function for qsort.
static void gsc_helper_ancestry_intprinter_file(long unsigned int i, void *data)
Kernel for scaffold functions that require printing an integer to a file (as opposed to saving the in...
static gsc_GenoLocation gsc_emptylistnavigator_get_first(struct gsc_EmptyListNavigator *it)
Reset the cursor of a gsc_EmptyListNavigator to the first genotype.
static void gsc_emptylistnavigator_finaliselist(struct gsc_EmptyListNavigator *it)
Push emptylist edited genotypes into the SimData.
static gsc_GenoLocation gsc_emptylistnavigator_get_next(struct gsc_EmptyListNavigator *it)
Get the next sequential genotype in an gsc_EmptyListNavigator.
static int gsc_helper_mapfileunit_ascending_d_comparer(const void *p0, const void *p1)
Comparator function for qsort.
static gsc_GroupNum gsc_helper_split_by_quality_individuate(gsc_GenoLocation loc, void *datastore, size_t maxgroups, size_t groupsfound, gsc_GroupNum *results)
static gsc_GenoLocation gsc_nextgappy_valid_pos(struct gsc_GappyIterator *it)
Sets the current cursor position in a gsc_GappyIterator to the next valid position,...
static gsc_GroupNum gsc_helper_split_by_quality_halfsib1(gsc_GenoLocation loc, void *datastore, size_t maxgroups, size_t groupsfound, gsc_GroupNum *results)
static GSC_LOGICVAL gsc_helper_is_marker_in_chr(const unsigned int markerix, const gsc_LinkageGroup chr, double *pos)
Check if a marker index is found in a particular LinkageGroup, and provide its distance along the chr...
static void gsc_helper_genotypecell_to_allelematrix(gsc_GenoLocation loc, unsigned int markerix, enum gsc_GenotypeFileCellStyle style, char *cell, gsc_SimData *forrng)
Parse a string and save it as the alleles of a genotype at a particular location and genetic marker.
static void gsc_scaffold_save_ancestry_of(const gsc_AlleleMatrix *m, gsc_PedigreeID p1, gsc_PedigreeID p2, void(*strprinter)(char *, size_t, void *), void(*intprinter)(long unsigned int, void *), void *printer_data)
Identifies and saves (recursively) the pedigree of a pair of parents.
static gsc_TableFileCell gsc_helper_tablefilereader_get_next_cell_wqueue(gsc_TableFileReader *tf, gsc_TableFileCell **queue, size_t *queuesize)
Return the next cell from a queue of cells until the queue is exhausted, and thereafter read new cell...
static int gsc_helper_parentchooser_cloning(void *parentIterator, union gsc_datastore_make_genotypes *datastore, unsigned int *counter, gsc_ParentChoice parents[static 2])
parentChooser function parameter for gsc_make_clones.
static gsc_GroupNum gsc_helper_split_by_quality_halfsibtemplate(gsc_GenoLocation loc, void *datastore, size_t maxgroups, size_t groupsfound, gsc_GroupNum *results, gsc_PedigreeID(*getparent)(gsc_GenoLocation))
static gsc_GroupNum gsc_helper_split_by_allocator_equalprob(gsc_GenoLocation loc, gsc_SimData *d, void *datastore, size_t n_outgroups, size_t *subgroupsfound, gsc_GroupNum *outgroups)
static void gsc_helper_make_offspring_cross(gsc_SimData *d, union gsc_datastore_make_genotypes *datastore, gsc_ParentChoice parents[static 2], gsc_GenoLocation putHere)
offspringGenerator function parameter for all crossing functions
static void gsc_helper_genoptions_save_pedigrees(FILE *fp, gsc_SimData *d, gsc_AlleleMatrix *tosave)
save-as-you-go (pedigrees)
static int gsc_helper_mapfileunit_ascending_chr_comparer(const void *p0, const void *p1)
Comparator function for qsort.
static enum gsc_GenotypeFileCellStyle gsc_helper_genotype_matrix_identify_cell_style(gsc_TableFileCell c)
Identify what formatting a genotype matrix is representing alleles as.
static void * gsc_malloc_wrap(const size_t size, char exitonfail)
Replace calls to malloc direct with this function.
static int gsc_helper_parentchooser_selfing(void *parentIterator, union gsc_datastore_make_genotypes *datastore, unsigned int *counter, gsc_ParentChoice parents[static 2])
parentChooser function parameter for gsc_self_n_times.
static void gsc_helper_make_offspring_clones(gsc_SimData *d, union gsc_datastore_make_genotypes *datastore, gsc_ParentChoice parents[static 2], gsc_GenoLocation putHere)
offspringGenerator function parameter for gsc_make_clones.
gsc_GroupNum gsc_make_n_crosses_from_top_m_percent(gsc_SimData *d, const int n, const int m, const gsc_GroupNum group, const gsc_MapID mapID, const gsc_EffectID effID, const gsc_GenOptions g)
static void gsc_helper_genoptions_give_names_and_ids(gsc_AlleleMatrix *am, gsc_SimData *d, const gsc_GenOptions g)
Apply gsc_GenOptions naming scheme and gsc_PedigreeID allocation to a single gsc_AlleleMatrix.
static FILE * gsc_helper_genoptions_save_bvs_setup(const gsc_SimData *d, const gsc_GenOptions g, unsigned int *effIndexp)
Opens file for writing save-as-you-go breeding values in accordance with gsc_GenOptions.
static void gsc_helper_sort_markerlist(unsigned int n_markers, struct gsc_MapfileUnit *markerlist)
Sort markerlist by chromosome name, and by position within each chromosome.
static struct gsc_GenotypeFile_MatrixFormat gsc_helper_genotypefile_matrix_detect_cellstyle(const gsc_TableFileCell *cellqueue, const size_t firstrowlen, const size_t queuelen, struct gsc_GenotypeFile_MatrixFormat format, const char *filenameforlog)
Determine the style in which alleles are stored in a genotype matrix.
static void gsc_helper_output_countmatrix_cell(FILE *f, gsc_GenoLocation loc, unsigned int markerix, void *data)
Kernel for gsc_scaffold_save_genotype_info, when the goal is to save the allele counts of a particula...
static gsc_GroupNum gsc_load_genotypefile_matrix(gsc_SimData *d, const char *filename, const gsc_FileFormatSpec format)
Loads a genotype file, with or without existing genome model in the SimData.
static int gsc_helper_indirect_alphabetical_str_comparer(const void *p0, const void *p1)
Comparator function for qsort.
static unsigned int gsc_helper_str_markerlist_leftjoin(gsc_KnownGenome g, unsigned int n_markers_in_list, struct gsc_MapfileUnit **markerlist)
Discard markers whose names are not present in a gsc_KnownGenome.
static size_t gsc_helper_parse_mapfile(const char *filename, struct gsc_MapfileUnit **out)
Extract the contents of a genetic map file.
static gsc_GenoLocation gsc_nextgappy_get_gap(struct gsc_GappyIterator *it)
Sets the current cursor position in a gsc_GappyIterator to the next empty position,...
static GSC_LOGICVAL gsc_helper_genotypefile_matrix_detect_cornercell_presence(const size_t ncellsfirstrow, const size_t ncellssecondrow, const _Bool secondrowheaderisempty)
Determine whether a genotype matrix has a corner cell or not.
static unsigned int gsc_helper_random_cross_checks(gsc_SimData *d, const gsc_GroupNum from_group, const unsigned int n_crosses, const unsigned int cap)
Check input parameters of random crossing functions.
static gsc_GenoLocation gsc_nextgappy_get_nongap(struct gsc_GappyIterator *it)
Sets the current cursor position in a gsc_GappyIterator to the next filled position,...
static void gsc_helper_ancestry_strprinter_file(char *str, size_t strlen, void *data)
Kernel for scaffold functions that require printing a string to a file (as opposed to saving the stri...
static gsc_GroupNum gsc_helper_split_by_allocator_unequalprob(gsc_GenoLocation loc, gsc_SimData *d, void *datastore, size_t n_outgroups, size_t *subgroupsfound, gsc_GroupNum *outgroups)
static gsc_GroupNum gsc_helper_split_by_quality_family(gsc_GenoLocation loc, void *datastore, size_t maxgroups, size_t groupsfound, gsc_GroupNum *results)
#define GSC_ID_T
genomicSimulation's "ID" type
#define GSC_NA_IDX
When accessing the current array index of a unique session ID, the "ID not found"/failure value is -1...
#define GSC_GLOBALX_T
genomicSimulation's "Candidate global index" type
#define GSC_GENOLEN_T
genomicSimulation's "Genotype length" type
GSC_LOGICVAL
genomicSimulation's "logical value" type
@ GSC_TRUE
@ GSC_FALSE
@ GSC_NA
#define GSC_NA_LOCALX
For candidate local indexes, the INVALID/UNINITIALISED value is -1 (for signed types) or the maximum ...
#define GSC_LOCALX_T
genomicSimulation's "Candidate local index" type
#define GSC_MALLOC(size)
#define GSC_NA_ID
For unique session IDs, the INVALID/UNINITIALISED value is 0.
#define GSC_NA_GLOBALX
For candidate global indexes, the INVALID/UNINITIALISED value is -1 (for signed types) or the maximum...
#define GSC_FREE(ptr)
#define CONTIG_WIDTH
#define NAME_LENGTH
char * names[1000]
Array of dynamically allocated strings containing the names of the lines/genotypes in this matrix.
unsigned int n_genotypes
Number of genotypes currently loaded in this matrix.
unsigned int n_markers
Number of markers across which genotypes are tracked.
char * alleles[1000]
A matrix of SNP markers by lines/genotypes containing pairs of alleles eg TT, TA.
unsigned int n_labels
Number of custom labels currently available to this gsc_AlleleMatrix.
gsc_AlleleMatrix * next
Pointer to the next gsc_AlleleMatrix in the linked list, or NULL if this entry is the last.
int ** labels
Pointer to list of labels.
gsc_PedigreeID ids[1000]
Unique ID for each genotype.
gsc_GroupNum groups[1000]
Group allocation of each genotype.
gsc_PedigreeID pedigrees[2][1000]
Two lists of integer IDs of the parents of this genotype (if tracked), or 0 if we don't know/care.
A structure to iterate forwards and backwards through all genotypes in a gsc_SimData or through only ...
_Bool atEnd
Boolean that is TRUE if the iterator's 'cursor' is on the last genotype (genotype with the highest in...
unsigned int cachedAMIndex
Index of cachedAM in the linked list of gsc_AlleleMatrix beginning at d->m.
const gsc_GroupNum group
Group through which to iterate.
gsc_AlleleMatrix * cachedAM
Pointer to the gsc_AlleleMatrix from the linked list of gsc_AlleleMatrix beginning at d->m where the ...
_Bool atStart
Boolean that is TRUE if the iterator's 'cursor' is on the first genotype (genotype with the lowest in...
unsigned int localPos
Local index (index within the cachedAM) of the genotype in the linked list of gsc_AlleleMatrix beginn...
gsc_AlleleMatrix * am
Simulation genotypes through which to iterate.
A row-major heap matrix that contains floating point numbers.
double ** matrix
The actual matrix and contents.
size_t cols
number of columns in the matrix
size_t rows
Number of rows in the matrix.
A type representing a particular loaded set of marker effects.
unsigned int id
A type that stores a matrix of effect values and their names.
char * effect_names
Character array containing allele characters ordered to match rows of effects.
gsc_DecimalMatrix effects
Effect on breeding value of alleles at markers.
A structure to hold an initially empty AlleleMatrix list whose genotypes can be accessed sequentially...
gsc_PedigreeID currentid
gsc_AlleleMatrix * firstAM
gsc_AlleleMatrix * localAM
File format specifier for the genotype input file.
union gsc_FileFormatSpec::@7 spec
struct gsc_GenotypeFile_MatrixFormat matrix
enum gsc_GenotypeFileType filetype
A structure to iterate forwards through all positions in the gsc_AlleleMatrix linked list in gsc_SimD...
gsc_GenoLocation cursor
unsigned int cursorAMIndex
A type that contains choices of settings for gsc_SimData functions that create a new gsc_AlleleMatrix...
_Bool will_allocate_ids
A boolean: whether to allocate generated offspring session- unique IDs.
_Bool will_track_pedigree
A boolean: whether to track parentage of generated offspring.
_Bool will_name_offspring
A boolean: whether generated offspring should be given names.
const char * filename_prefix
A string used in save-as-you-go file names.
const char * offspring_name_prefix
If will_name_offspring is true, generated offspring are named with the concatenation {offspring_name_...
gsc_EffectID will_save_bvs_to_file
If equal to NO_EFFECTSET, no bvs are calculated or saved.
_Bool will_save_pedigree_to_file
A boolean.
unsigned int family_size
The number of offspring to produce from each cross.
_Bool will_save_to_simdata
A boolean.
_Bool will_save_alleles_to_file
A boolean.
An gsc_AlleleMatrix/gsc_AlleleMatrix index coordinate of a particular genotype in the simulation.
gsc_AlleleMatrix * localAM
Pointer to the gsc_AlleleMatrix in which the genotype can be found.
unsigned int localPos
Index in the localAM where the genotype can be found (min value: 0.
Variants in the format of a genotype matrix file.
enum gsc_GenotypeFileCellStyle cell_style
< Boolean: Are genetic markers the rows of the matrix (GSC_TRUE) or the columns of the matrix (GSC_FA...
GSC_LOGICVAL markers_as_rows
< Boolean: Is the first row of the file a header row? (Note: genotype matrix files must have row head...
A type representing the identifier of a group of genotypes.
unsigned int num
A type that stores the genome structure used in simulation.
char ** marker_names
A vector of n_markers strings containing the names of markers, ordered according to their index in an...
gsc_RecombinationMap * maps
A vector of n_maps recombination maps, to use for simulating meiosis.
char *** names_alphabetical
A vector of n_markers pointers to names in marker_names, ordered in alphabetical order of the names.
unsigned int n_markers
The total number of markers.
unsigned int n_maps
The number of recombination maps currently stored.
gsc_MapID * map_ids
A vector of n_maps identifiers for each of the recombination maps currently stored.
A type representing a particular custom label.
unsigned int id
A generic store for a linkage group, used to simulate meiosis on a certain subset of markers.
gsc_ReorderedLinkageGroup reorder
enum gsc_LinkageGroup::gsc_LinkageGroupType type
gsc_SimpleLinkageGroup simple
union gsc_LinkageGroup::@6 map
A type representing a particular loaded recombination map.
unsigned int id
Unprocessed data for one marker (linkage group and position) loaded from a map file.
unsigned long chr
A struct used to store a set of blocks of markers.
unsigned int num_blocks
The number of blocks whose details are stored here.
unsigned int * num_markers_in_block
Pointer to a heap array of length num_blocks containing the number of markers that make up each block...
unsigned int ** markers_in_block
Pointer to a heap array of length num_blocks, each entry in which is a pointer to a heap array with l...
Simple crate that stores a GroupNum, a MapID, and an EffectID.
gsc_EffectID effSet
gsc_GroupNum group
gsc_GenoLocation loc
Location in the simulation where this parent is stored.
unsigned int mapindex
Index in d->genome.maps of the recombination map to use when producing gametes from this parent.
A type representing a program-lifetime-unique identifier for a genotype, to be used in tracking pedig...
unsigned int id
A structure to search and cache indexes of all genotypes in a gsc_SimData or of all the members of a ...
unsigned int cacheSize
Length in gsc_GenoLocations of cache
const gsc_GroupNum group
Group through which to iterate.
unsigned int largestCached
Local/group index (that is, index in cache) of the highest cell in cache that has been filled.
gsc_SimData * d
Simulation data through which to iterate.
unsigned int groupSize
If the number of genotypes in the simulation that fulfil the iterator's group criteria is known,...
gsc_GenoLocation * cache
Array iteratively updated with the known genotypes in the simulation that fulfil the group criteria o...
A type that stores linkage groups and crossover probabilities for simulating meiosis.
size_t n_chr
The number of chromosomes/linkage groups represented in the map.
gsc_LinkageGroup * chrs
Vector of n_chr recombination maps, one for each chromosome/linkage group in this recombination map.
unsigned int n_markers
The number of markers in this chromosome/linkage group.
double expected_n_crossovers
Expected value of the Poisson distribution from which the number of crossovers in this linkage group ...
unsigned int * marker_indexes
Array with n_markers entries.
double * dists
Array with n_markers entries, containing at position i the distance in centimorgans along the linkage...
Composite type that is used to run crossing simulations.
unsigned int n_labels
The number of custom labels in the simulation.
gsc_KnownGenome genome
A gsc_KnownGenome, which stores the information of known markers and linkage groups,...
gsc_LabelID * label_ids
The identifier number of each label in the simulation, in order of their lookup index.
gsc_EffectID * eff_set_ids
The identifier number of each set of allele effects in the simulation, ordered by their lookup index.
gsc_EffectMatrix * e
Array of n_eff_sets gsc_EffectMatrix, optional for the use of the simulation.
int * label_defaults
Array containing the default (birth) value of each custom label.
unsigned int n_groups
Number of groups currently existing in simulation.
unsigned int n_eff_sets
The number of sets of allele effects in the simulation.
gsc_PedigreeID current_id
Highest SimData-unique ID that has been generated so far.
rnd_pcg_t rng
Random number generator working memory.
gsc_AlleleMatrix * m
Pointer to an gsc_AlleleMatrix, which stores data and metadata of founders and simulated offspring.
unsigned int first_marker_index
The index of the first marker in this chromosome/linkage group in the simulation's corresponding gsc_...
unsigned int n_markers
The number of markers in this chromosome/linkage group.
double * dists
Array with n_markers entries, containing at position i the distance in centimorgans along the linkage...
double expected_n_crossovers
Expected value of the Poisson distribution from which the number of crossovers in this linkage group ...
Represent a cell read by a gsc_TableFileReader.
int predCol
since last read, how many column gaps have there been?
char * cell
deep copy of the cell contents, or NULL
_Bool isCellShallow
is the string in 'cell' a shallow copy or deep copy?
int predNewline
since last read, how many newlines have there been?
_Bool eof
are we (this cell) at end of file
size_t cell_len
length of cell contents (because a shallow copy may not be null-terminated)
Stream reader for files of some tabular format.
int buf_fill
Number of characters from the file that are currently loaded in buf.
char buf[8192]
A window of characters from the file, loaded into memory for current processing.
int cursor
Index in buf of the first character that the file reader has not yet parsed.
FILE * fp
File being read.
struct gsc_datastore_make_genotypes::@5 clones
struct gsc_datastore_make_genotypes::@3 selfing
struct gsc_datastore_make_genotypes::@1 rand_btwn
struct gsc_datastore_make_genotypes::@0 rand
struct gsc_datastore_make_genotypes::@2 targeted
struct gsc_datastore_make_genotypes::@4 doub_haps