genomicSimulationC 0.3
sim-operations.c
Go to the documentation of this file.
1#ifndef SIM_OPERATIONS
2#define SIM_OPERATIONS
3#include "sim-operations.h"
4/* genomicSimulationC v0.3 - last edit 23 July 2025 */
5
12 .offspring_name_prefix = NULL,
13 .family_size = 1,
14 .will_track_pedigree = GSC_FALSE,
15 .will_allocate_ids = GSC_TRUE,
16 .filename_prefix = NULL,
17 .will_save_pedigree_to_file = GSC_FALSE,
18 .will_save_bvs_to_file = GSC_NO_EFFECTSET,
19 .will_save_alleles_to_file = GSC_FALSE,
20 //.will_save_recombinations_to_file = GSC_FALSE,
21 .will_save_to_simdata = GSC_TRUE
22};
23
34static void* gsc_malloc_wrap(const size_t size, char exitonfail) {
35 if (size == 0) {
36 fprintf(stderr, "0 memory allocation requested.\n");
37 return NULL;
38 }
39 void* v = GSC_MALLOC(size);
40 if (v == NULL) {
41 if (exitonfail) {
42 fprintf(stderr, "Memory allocation failed. Exiting.\n"); exit(2);
43 } else {
44 fprintf(stderr, "Memory allocation failed.\n");
45 }
46 }
47 return v;
48}
49
63 const GSC_ID_T n_labels,
64 const int* labelDefaults,
65 const GSC_LOCALX_T n_genotypes) {
67
68 m->n_genotypes = n_genotypes;
69 m->n_markers = n_markers;
70 m->n_labels = n_labels;
71 //m->alleles = gsc_malloc_wrap(sizeof(char*) * CONTIG_WIDTH);
72 for (GSC_LOCALX_T i = 0; i < n_genotypes; ++i) {
73 m->alleles[i] = gsc_malloc_wrap(sizeof(char) * (n_markers<<1),GSC_TRUE);
74 memset(m->alleles[i], 0, sizeof(char) * (n_markers<<1));
75 //m->ids[i] = 0;
76 }
77 memset(m->alleles + n_genotypes, 0, sizeof(char*) * (CONTIG_WIDTH - n_genotypes)); // setting the pointers to NULL
78
79 if (n_labels > 0) {
80 m->labels = gsc_malloc_wrap(sizeof(int*) * n_labels,GSC_TRUE);
81 for (GSC_ID_T i = 0; i < n_labels; ++i) {
82 m->labels[i] = gsc_malloc_wrap(sizeof(int) * CONTIG_WIDTH,GSC_TRUE);
83 for (GSC_LOCALX_T j = 0; j < CONTIG_WIDTH; ++j) {
84 m->labels[i][j] = labelDefaults[i];
85 }
86 }
87 } else if (n_labels == 0) {
88 m->labels = NULL;
89 } else {
90 fprintf(stderr, "Invalid negative number of labels provided to gsc_create_empty_allelematrix");
91 m->labels = NULL;
92 }
93
94 memset(m->ids, 0, sizeof(gsc_PedigreeID) * CONTIG_WIDTH);
95 memset(m->pedigrees[0], 0, sizeof(gsc_PedigreeID) * CONTIG_WIDTH);
96 memset(m->pedigrees[1], 0, sizeof(gsc_PedigreeID) * CONTIG_WIDTH);
97 memset(m->groups, 0, sizeof(gsc_GroupNum) * CONTIG_WIDTH);
98 memset(m->names, 0, sizeof(char*) * CONTIG_WIDTH); // setting the pointers to NULL
99
100 m->next = NULL;
101
102 return m;
103}
104
114 d->n_labels = 0;
115 d->label_ids = NULL;
116 d->label_defaults = NULL;
117 d->genome.n_markers = 0;
118 d->genome.marker_names = NULL;
119 d->genome.names_alphabetical = NULL;
120 d->genome.n_maps = 0;
121 d->genome.map_ids = NULL;
122 d->genome.maps = NULL;
123 d->m = NULL;
124 d->n_eff_sets = 0;
125 d->e = NULL;
126 rnd_pcg_seed( &d->rng, RNGseed );
128 d->n_groups = 0;
129 return d;
130}
131
143 if (d == NULL) return;
144 // Free label defaults
145 if (d->n_labels > 0) {
146 if (d->label_ids != NULL) {
148 }
149 if (d->label_defaults != NULL) {
151 }
152 }
153
154 // Free other details
156 for (GSC_ID_T i = 0; i < d->n_eff_sets; ++i) {
157 gsc_delete_effects_table(&(d->e[i]));
158 }
159 if (d->n_eff_sets > 0) {
161 for (GSC_ID_T i = 0; i < d->n_eff_sets; ++i) {
162 gsc_delete_effects_table(&(d->e[i]));
163 }
164 GSC_FREE(d->e);
165 }
167
168 // Clear all values but the RNG
169 d->n_labels = 0;
170 d->label_ids = NULL;
171 d->label_defaults = NULL;
172 d->genome.n_markers = 0;
173 d->genome.marker_names = NULL;
174 d->genome.n_maps = 0;
175 d->genome.map_ids = NULL;
176 d->genome.maps = NULL;
177 d->m = NULL;
178 d->n_eff_sets = 0;
179 d->eff_set_ids = NULL;
180 d->e = NULL;
181 // d->rng
183 d->n_groups = 0;
184}
185
186/*-------------------------Random generators---------------------------------*/
187
188/* https://www.everything2.com/title/Generating+random+numbers+with+a+Poisson+distribution
189https://en.wikipedia.org/wiki/Poisson_distribution#Generating_Poisson-distributed_random_variables
190*/
201int gsc_randpoi(rnd_pcg_t* rng, double lambda) {
202 if (lambda <= 0) { // invalid parameter.
203 //In this case we use the function to generate number of crossovers
204 // so if parameter/length passed in is invalid, we just want no crossovers
205 return 0;
206 }
207
208 int k = 0;
209 double target = exp(-lambda);
210 double p = rnd_pcg_nextf(rng);
211 while (p > target) {
212 k += 1;
213 p *= rnd_pcg_nextf(rng);
214 }
215 return k;
216}
217
218/*end random generators*/
219
220/*------------------------Supporter Functions--------------------------------*/
221
240struct gsc_TableSize gsc_get_file_dimensions(const char* filename, const char sep) {
241 struct gsc_TableSize details;
242 details.num_columns = 0;
243 details.num_rows = 0;
244
245 FILE* fp;
246 int c; // this is used to store the output of fgetc i.e. the next character in the file
247 if ((fp = fopen(filename, "r")) == NULL) {
248 fprintf(stderr, "Failed to open file %s.\n", filename); exit(1);
249 }
250 c = fgetc(fp);
251
252 while (c != EOF && c != '\n') {
253 //RPACKINSERT R_CheckUserInterrupt();
254 if (c == sep) {
255 details.num_columns += 1; // add count for columns of form [colname]sep
256 }
257 c = fgetc(fp);
258 }
259
260 details.num_columns += 1; // add another column that was bounded by sep[colname][EOF or \n]
261 details.num_rows = 1; // we successfully got the first row
262
263 // now get all the rows. What we care about in the rows is the number of them
264 c = fgetc(fp);
265 int sep_count = 0; // for each row, count the columns to make sure they match and the file is valid
266 int has_length = GSC_FALSE;
267 while (c != EOF) {
268 //RPACKINSERT R_CheckUserInterrupt();
269 if (c == '\n') {
270 details.num_rows += 1; // add count for columns of form [colname]sep
271
272 // check we have right number of columns and reset counter
273 if (has_length && sep_count != details.num_columns-1) {
274 // we have a bad number of columns
275 details.num_columns = 0;
276 fclose(fp);
277 fprintf(stderr, "Bad columns on row %d\n", details.num_rows + 1); exit(1);
278 }
279 sep_count = 0;
280 has_length = GSC_FALSE;
281
282 } else if (c == sep) {
283 sep_count += 1;
284 } else if (has_length == GSC_FALSE) {
285 has_length = GSC_TRUE;
286 }
287 c = fgetc(fp);
288 }
289 if (has_length) {
290 details.num_rows += 1; // for the last row before EOF
291 }
292
293 fclose(fp);
294 return details;
295}
296
318/*int gsc_get_from_ordered_uint_list(const unsigned int target,
319 const unsigned int listLen,
320 const unsigned int* list) {
321 unsigned int first = 0, last = listLen - 1;
322 int index = (first + last) / 2;
323 while (list[index] != target && first <= last) {
324 if (list[index] == 0) {
325 int lookahead = 1;
326 while(1) {
327 if (index+lookahead <= last && list[index+lookahead] != 0) {
328 if (list[index+lookahead] == target) {
329 return index+lookahead;
330 } else if (list[index+lookahead] < target) {
331 first = index+lookahead + 1;
332 break;
333 } else {
334 last = index - 1;
335 break;
336 }
337 } else if (index-lookahead <= last && list[index-lookahead] != 0) {
338 if (list[index-lookahead] == target) {
339 return index-lookahead;
340 } else if (list[index-lookahead] < target) {
341 first = index + 1;
342 break;
343 } else {
344 last = index-lookahead - 1;
345 break;
346 }
347 }
348 ++lookahead;
349 if (index+lookahead <= last || index-lookahead >= first) {
350 // failed to find any nonzeros between first and last
351 return -1;
352 }
353 }
354
355 } else { // No need to dodge 0. Normal binary search.
356 if (list[index] == target) {
357 return index;
358 } else if (list[index] < target) {
359 first = index + 1;
360 } else {
361 last = index - 1;
362 }
363
364 }
365 // index has been updated, no matter the branch.
366 index = (first + last) / 2;
367 }
368
369 if (first > last) {
370 return -1;
371 }
372 return index;
373}*/
374
397 const GSC_LOCALX_T listLen,
398 const gsc_PedigreeID* list) {
399 GSC_LOCALX_T first = 0, last = listLen - 1;
400 GSC_LOCALX_T index = (first + last) / 2;
401 while (list[index].id != target.id && first <= last) {
402 if (list[index].id == GSC_NO_PEDIGREE.id) {
403 int lookahead = 1;
404 while(1) {
405 if (index+lookahead <= last && list[index+lookahead].id != GSC_NO_PEDIGREE.id) {
406 if (list[index+lookahead].id == target.id) {
407 return index+lookahead;
408 } else if (list[index+lookahead].id < target.id) {
409 first = index+lookahead + 1;
410 break;
411 } else {
412 last = index - 1;
413 break;
414 }
415 } else if (index-lookahead <= last && list[index-lookahead].id != GSC_NO_PEDIGREE.id) {
416 if (list[index-lookahead].id == target.id) {
417 return index-lookahead;
418 } else if (list[index-lookahead].id < target.id) {
419 first = index + 1;
420 break;
421 } else {
422 last = index-lookahead - 1;
423 break;
424 }
425 }
426 ++lookahead;
427 if (index+lookahead <= last || index-lookahead >= first) {
428 // failed to find any nonzeros between first and last
429 return GSC_NA_LOCALX;
430 }
431 }
432
433 } else { // No need to dodge 0. Normal binary search.
434 if (list[index].id == target.id) {
435 return index;
436 } else if (list[index].id < target.id) {
437 first = index + 1;
438 } else {
439 last = index - 1;
440 }
441
442 }
443 // index has been updated, no matter the branch.
444 index = (first + last) / 2;
445 }
446
447 if (first > last) {
448 return GSC_NA_LOCALX;
449 }
450 return index;
451}
452
469size_t gsc_get_from_unordered_str_list(const char* target,
470 const size_t listLen,
471 const char** list) {
472 for (size_t i = 0; i < listLen; ++i) {
473 if (strcmp(list[i], target) == 0) {
474 return i;
475 }
476 }
477 return SIZE_MAX; // did not find a match.
478}
479
496size_t gsc_get_from_ordered_str_list(const char* target,
497 const size_t listLen,
498 const char** list) {
499 size_t first = 0, last = listLen - 1;
500 size_t index = (first + last) / 2;
501 int comparison = strcmp(target,list[index]);
502 while (comparison != 0 && first <= last) {
503 if (comparison == 0) {
504 return index;
505 } else if (comparison < 0) {
506 first = index + 1;
507 } else {
508 last = index - 1;
509 }
510
511 // index has been updated, no matter the branch.
512 index = (first + last) / 2;
513 comparison = strcmp(target, list[index]);
514 }
515
516 if (first > last) {
517 return SIZE_MAX;
518 }
519 return index;
520}
521
522
541void gsc_shuffle_up_to(rnd_pcg_t* rng,
542 void* sequence,
543 const size_t item_size,
544 const size_t total_n,
545 const size_t n_to_shuffle) {
546 if (n_to_shuffle > 1) {
547
548 size_t tmp_spot;
549 void* tmp = &tmp_spot;
550 if (item_size > sizeof(tmp_spot)) {
551 tmp = gsc_malloc_wrap(item_size, GSC_TRUE);
552 }
553
554 size_t maxi = total_n > n_to_shuffle ? n_to_shuffle - 1 : total_n - 1;
555 size_t i;
556 for (i = 0; i <= maxi; ++i) {
557 // items before i are already shuffled
558 size_t j = i + rnd_pcg_range(rng,0,total_n - i - 1);
559
560 // add the next chosen value to the end of the shuffle
561 memcpy(&tmp, sequence + j*item_size, item_size);
562 memcpy(sequence + j*item_size, sequence + i*item_size, item_size);
563 memcpy(sequence + i*item_size, &tmp, item_size);
564 }
565
566 if (item_size > sizeof(tmp_spot)) {
567 free(tmp);
568 }
569 }
570}
571
587 const char* prefix,
588 const int suffix,
589 const GSC_LOCALX_T from_index) {
590 char sname[NAME_LENGTH];
591 char format[NAME_LENGTH];
592 if (prefix == NULL) {
593 // make it an empty string instead, so it is not displayed as (null)
594 prefix = "";
595 }
596 // use sname to save the number of digits to pad by:
597 sprintf(sname, "%%0%dd", gsc_get_integer_digits(a->n_genotypes - from_index)); // Creates: %0[n]d
598 sprintf(format, "%s%s", prefix, sname);
599
600 int livingsuffix = suffix;
601 ++livingsuffix;
602 for (GSC_LOCALX_T i = from_index; i < a->n_genotypes; ++i) {
603 // clear name if it's pre-existing
604 if (a->names[i] != NULL) {
605 GSC_FREE(a->names[i]);
606 }
607
608 // save new name
609 sprintf(sname, format, livingsuffix);
610 a->names[i] = gsc_malloc_wrap(sizeof(char) * (strlen(sname) + 1),GSC_TRUE);
611 strcpy(a->names[i], sname);
612
613 ++livingsuffix;
614 }
615}
616
629 // Add new label default
630 if (d->n_labels == 0) {
632 d->label_ids[0] = (gsc_LabelID){.id=1};
633
634 d->label_defaults = gsc_malloc_wrap(sizeof(int) * 1,GSC_TRUE);
635 d->label_defaults[0] = setTo;
636
637 } else if (d->n_labels > 0) {
638
639 gsc_LabelID* new_label_ids;
640 if (d->label_ids != NULL) {
641 new_label_ids = gsc_malloc_wrap(sizeof(gsc_LabelID) * (d->n_labels + 1),GSC_TRUE);
642 memcpy(new_label_ids,d->label_ids,sizeof(gsc_LabelID)*d->n_labels);
643 new_label_ids[d->n_labels] = gsc_get_new_label_id(d);
645
646 } else { // d->label_ids == NULL
647 // If the other labels do not have identifiers, they're corrupted and
648 // deserve to be destroyed.
649 new_label_ids = gsc_malloc_wrap(sizeof(gsc_LabelID) * 1,GSC_TRUE);
650 d->n_labels = 0;
651 new_label_ids[d->n_labels] = gsc_get_new_label_id(d);
652 }
653 d->label_ids = new_label_ids;
654
655 int* new_label_defaults = gsc_malloc_wrap(sizeof(int) * (d->n_labels + 1),GSC_TRUE);
656 if (d->label_defaults != NULL) {
657 for (GSC_ID_T i = 0; i < d->n_labels; ++i) {
658 new_label_defaults[i] = d->label_defaults[i];
659 }
661 } else if (d->n_labels > 0) {
662 memset(new_label_defaults, 0, sizeof(int) * d->n_labels);
663 }
664 new_label_defaults[d->n_labels] = setTo;
665 d->label_defaults = new_label_defaults;
666
667 } else {
668 fprintf(stderr, "Labels malformed; gsc_SimData may be corrupted\n");
669 return (gsc_LabelID){.id=GSC_NA_ID};
670 }
671 d->n_labels += 1;
672
673 // Set all values of that label to the default
674 gsc_AlleleMatrix* m = d->m;
675 int warned = GSC_FALSE;
676 while (m != NULL) {
677 // Do we need to destroy the extant label table? happens if label_ids were missing and we discarded them
678 if (m->n_labels != d->n_labels - 1 && m->labels != NULL) {
679 for (GSC_ID_T i = 0; i < m->n_labels; ++i) {
680 GSC_FREE(m->labels[i]);
681 }
682 GSC_FREE(m->labels);
683 m->labels = NULL;
684 }
685
686 m->n_labels = d->n_labels;
687
688 // Consider the case when we need to expand the label list
689 if (m->n_labels > 1 && m->labels != NULL) {
690 GSC_ID_T newLabel = m->n_labels - 1;
691
692 // Create label list
693 int** oldLabelList = m->labels;
694 m->labels = gsc_malloc_wrap(sizeof(int*) * m->n_labels,GSC_TRUE);
695 for (GSC_ID_T i = 0; i < m->n_labels - 1; ++i) {
696 m->labels[i] = oldLabelList[i];
697 }
698 m->labels[newLabel] = gsc_malloc_wrap(sizeof(int) * CONTIG_WIDTH,GSC_TRUE);
699 GSC_FREE(oldLabelList);
700
701 // Set labels
702 if (setTo == 0) {
703 memset(m->labels[newLabel], 0, sizeof(int) * CONTIG_WIDTH);
704 } else {
705 for (GSC_LOCALX_T i = 0; i < CONTIG_WIDTH; ++i) {
706 m->labels[newLabel][i] = setTo;
707 }
708 }
709
710 // Consider the case we need to initialise the label list
711 } else if (m->n_labels == 1 && m->labels == NULL) {
712 // Create the label list
713 m->labels = gsc_malloc_wrap(sizeof(int*) * 1,GSC_TRUE);
714 m->labels[0] = gsc_malloc_wrap(sizeof(int) * CONTIG_WIDTH,GSC_TRUE);
715
716 // Set labels
717 if (setTo == 0) {
718 memset(m->labels[0], 0, sizeof(int) * CONTIG_WIDTH);
719 } else {
720 for (GSC_LOCALX_T i = 0; i < CONTIG_WIDTH; ++i) {
721 m->labels[0][i] = setTo;
722 }
723 }
724
725 } else if (!warned) {
726 fprintf(stderr, "Unable to create new label for all genotypes; gsc_SimData may be corrupted\n");
727 warned = GSC_TRUE;
728 }
729
730 m = m->next;
731 }
732 return d->label_ids[d->n_labels - 1];
733}
734
747 const gsc_LabelID whichLabel,
748 const int newDefault) {
749 GSC_ID_T labelIndex;
750 if (whichLabel.id == GSC_NO_LABEL.id || (labelIndex = gsc_get_index_of_label(d, whichLabel)) == GSC_NA_IDX) {
751 fprintf(stderr, "Nonexistent label %lu\n", (long unsigned int) whichLabel.id);
752 return;
753 }
754 d->label_defaults[labelIndex] = newDefault;
755}
756
773 const gsc_GroupNum whichGroup,
774 const gsc_LabelID whichLabel,
775 const int setTo) {
776 GSC_ID_T labelIndex;
777 if (whichLabel.id == GSC_NO_LABEL.id || (labelIndex = gsc_get_index_of_label(d, whichLabel)) == GSC_NA_IDX) {
778 fprintf(stderr, "Nonexistent label %lu\n", (long unsigned int) whichLabel.id);
779 return;
780 }
781 // Risks: if m->labels or m->labels[i] don't exist for labels where they should,
782 // will get some out of bounds accesses.
783
784 gsc_AlleleMatrix* m = d->m;
785 if (whichGroup.num != GSC_NO_GROUP.num) { // set the labels of group members
786 while (m != NULL) {
787
788 for (GSC_LOCALX_T i = 0; i < m->n_genotypes; ++i) {
789 if (m->groups[i].num == whichGroup.num) {
790 m->labels[labelIndex][i] = setTo;
791 }
792 }
793 m = m->next;
794
795 }
796
797 } else { // whichGroup == 0 so set the labels of all genotypes
798 while (m != NULL) {
799
800 if (setTo == 0) {
801 memset(m->labels[labelIndex], 0, sizeof(int) * m->n_genotypes);
802 } else {
803 for (GSC_LOCALX_T i = 0; i < m->n_genotypes; ++i) {
804 m->labels[labelIndex][i] = setTo;
805 }
806 }
807 m = m->next;
808 }
809 }
810}
811
830 const gsc_GroupNum whichGroup,
831 const gsc_LabelID whichLabel,
832 const int byValue) {
833 GSC_ID_T labelIndex;
834 if (whichLabel.id == GSC_NO_LABEL.id || (labelIndex = gsc_get_index_of_label(d, whichLabel)) == GSC_NA_IDX) {
835 fprintf(stderr, "Nonexistent label %lu\n", (long unsigned int) whichLabel.id);
836 return;
837 }
838 // Risks: if m->labels or m->labels[i] don't exist for labels where they should,
839 // will get some out of bounds accesses.
840
841 gsc_AlleleMatrix* m = d->m;
842 if (whichGroup.num != GSC_NO_GROUP.num) { // set the labels of group members
843 while (m != NULL) {
844
845 for (GSC_LOCALX_T i = 0; i < m->n_genotypes; ++i) {
846 if (m->groups[i].num == whichGroup.num) {
847 m->labels[labelIndex][i] += byValue;
848 }
849 }
850 m = m->next;
851 }
852
853 } else { // whichGroup == 0 so set the labels of all genotypes
854 while (m != NULL) {
855
856 for (GSC_LOCALX_T i = 0; i < m->n_genotypes; ++i) {
857 m->labels[labelIndex][i] += byValue;
858 }
859 m = m->next;
860 }
861 }
862
863}
864
889 const gsc_GroupNum whichGroup,
890 const GSC_GLOBALX_T startIndex,
891 const gsc_LabelID whichLabel,
892 const size_t n_values,
893 const int* values) {
894 GSC_ID_T labelIndex;
895 if (whichLabel.id == GSC_NO_LABEL.id || (labelIndex = gsc_get_index_of_label(d, whichLabel)) == GSC_NA_IDX) {
896 fprintf(stderr, "Nonexistent label %lu\n", (long unsigned int) whichLabel.id);
897 return;
898 }
899
900 gsc_AlleleMatrix* m = d->m;
901 GSC_GLOBALX_T currentIndex = 0;
902 if (whichGroup.num != GSC_NO_GROUP.num) { // set the labels of group members
903 // First scan through to find firstIndex
904 while (m != NULL) {
905
906 for (GSC_LOCALX_T i = 0; i < m->n_genotypes; ++i) {
907 if (m->groups[i].num == whichGroup.num) {
908 // Update label if it is between startIndex and startIndex + n_values
909 if (currentIndex >= startIndex) {
910 m->labels[labelIndex][i] = values[currentIndex - startIndex];
911 }
912 currentIndex++;
913 if (currentIndex > startIndex && currentIndex - startIndex >= n_values) {
914 return;
915 }
916 }
917 }
918 m = m->next;
919 }
920
921 } else { // whichGroup == 0 so set the labels of all genotypes
922 while (m != NULL) {
923
924 for (GSC_LOCALX_T i = 0; i < m->n_genotypes; ++i) {
925 // Update label if it is between startIndex and startIndex + n_values
926 if (currentIndex >= startIndex) {
927 m->labels[labelIndex][i] = values[currentIndex - startIndex];
928 }
929 currentIndex++;
930 if (currentIndex > startIndex && currentIndex - startIndex >= n_values) {
931 return;
932 }
933 }
934 m = m->next;
935 }
936 }
937}
938
967 const gsc_GroupNum whichGroup,
968 const GSC_GLOBALX_T startIndex,
969 const size_t n_values,
970 const char** values) {
971 // this will be much improved once we can hash our names.
972
973 gsc_AlleleMatrix* m = d->m;
974 GSC_GLOBALX_T currentIndex = 0;
975 if (whichGroup.num != GSC_NO_GROUP.num) { // set the names of group members
976 // First scan through to find firstIndex
977 while (m != NULL) {
978
979 for (GSC_LOCALX_T i = 0; i < m->n_genotypes; ++i) {
980 if (m->groups[i].num == whichGroup.num) {
981 // Update name if index is between startIndex and startIndex + n_values
982 if (currentIndex >= startIndex) {
983 // clear name if it's pre-existing
984 if (m->names[i] != NULL) {
985 GSC_FREE(m->names[i]);
986 }
987
988 // save new name
989 const GSC_GLOBALX_T whichName = currentIndex - startIndex;
990 m->names[i] = gsc_malloc_wrap(sizeof(char) * (strlen(values[whichName]) + 1),GSC_TRUE);
991 strcpy(m->names[i], values[whichName]);
992 }
993 currentIndex++;
994 if (currentIndex > n_values) {
995 return;
996 }
997 }
998 }
999 m = m->next;
1000
1001 }
1002
1003 } else { // whichGroup == 0 so set the names of all genotypes
1004 while (m != NULL) {
1005
1006 for (GSC_LOCALX_T i = 0; i < m->n_genotypes; ++i) {
1007 // Update name if it is between startIndex and startIndex + n_values
1008 if (currentIndex >= startIndex) {
1009 // clear name if it's pre-existing
1010 if (m->names[i] != NULL) {
1011 GSC_FREE(m->names[i]);
1012 }
1013
1014 // save new name
1015 const GSC_GLOBALX_T whichName = currentIndex - startIndex;
1016 const int nameLen = strlen(values[whichName]);
1017 m->names[i] = gsc_malloc_wrap(sizeof(char) * (nameLen + 1),GSC_TRUE);
1018 strncpy(m->names[i], values[whichName], nameLen);
1019 }
1020 currentIndex++;
1021 if (currentIndex > n_values) {
1022 return;
1023 }
1024 }
1025 m = m->next;
1026
1027 }
1028 }
1029}
1030
1054 const char* which_marker,
1055 const char from,
1056 const char to) {
1057 GSC_GENOLEN_T nmarkers = 0;
1058 GSC_GLOBALX_T ngenos = 0;
1059 unsigned int nalleles = 0;
1060
1061 GSC_GENOLEN_T markeri;
1062 if (which_marker == NULL) {
1065
1066 while (IS_VALID_LOCATION(loc)) {
1067 for (GSC_GENOLEN_T m = 0; m < d->genome.n_markers; ++m) {
1068 if (from == loc.localAM->alleles[loc.localPos][m << 1]) {
1069 loc.localAM->alleles[loc.localPos][m << 1] = to;
1070 ++nalleles;
1071 ++ngenos;
1072 }
1073 if (from == loc.localAM->alleles[loc.localPos][(m << 1) + 1]) {
1074 loc.localAM->alleles[loc.localPos][(m << 1) + 1] = to;
1075 ++nalleles;
1076 if (loc.localAM->alleles[loc.localPos][m << 1] !=
1077 loc.localAM->alleles[loc.localPos][(m << 1) + 1]) {
1078 ++ngenos;
1079 }
1080 }
1081 }
1082
1083 loc = gsc_next_forwards(&it);
1084 }
1085
1087
1088
1089 } else if (gsc_get_index_of_genetic_marker(which_marker, d->genome, &markeri)) {
1090 nmarkers = 1;
1093 while (IS_VALID_LOCATION(loc)) {
1094 if (from == loc.localAM->alleles[loc.localPos][markeri << 1]) {
1095 loc.localAM->alleles[loc.localPos][markeri << 1] = to;
1096 ++nalleles;
1097 ++ngenos;
1098 }
1099 if (from == loc.localAM->alleles[loc.localPos][(markeri << 1) + 1]) {
1100 loc.localAM->alleles[loc.localPos][(markeri << 1) + 1] = to;
1101 ++nalleles;
1102 if (loc.localAM->alleles[loc.localPos][markeri << 1] !=
1103 loc.localAM->alleles[loc.localPos][(markeri << 1) + 1]) {
1104 ++ngenos;
1105 }
1106 }
1107
1108 loc = gsc_next_forwards(&it);
1109 }
1110
1112
1113 } else {
1114 nmarkers = 0;
1115 ngenos = 0;
1116 }
1117
1118 printf("Changed allele %c to %c %lu times across %lu markers and %lu genotypes\n",
1119 from, to, (long unsigned int)nalleles, (long unsigned int)nmarkers, (long unsigned int)ngenos);
1120}
1121
1144 const gsc_EffectID effset,
1145 const GSC_GENOLEN_T n_values,
1146 const double* values) {
1147 const GSC_ID_T effIndex = gsc_get_index_of_eff_set(d, effset);
1148 if (effIndex == GSC_NA_IDX) {
1149 fprintf(stderr,"Nonexistent effect set with id %lu\n", (long unsigned int) effset.id);
1150 return 0;
1151 }
1152
1153 if (n_values != d->e[effIndex].n_markers) {
1154 fprintf(stderr,"Cannot use these values as marker effect centres because the number of values is not equal to the number of markers in the effect set\n");
1155 return 0;
1156 }
1157
1158 if (d->e[effIndex].centre == NULL) {
1159 d->e[effIndex].centre = gsc_malloc_wrap(sizeof(*d->e[effIndex].centre)*d->e[effIndex].n_markers, GSC_TRUE);
1160 }
1161 memcpy(d->e[effIndex].centre, values, sizeof(*d->e[effIndex].centre)*d->e[effIndex].n_markers);
1162 return 1;
1163}
1164
1187 const gsc_EffectID effset,
1188 const GSC_GENOLEN_T n_markers,
1189 const char** marker_names,
1190 const double* centres) {
1191 const GSC_ID_T effIndex = gsc_get_index_of_eff_set(d, effset);
1192 if (effIndex == GSC_NA_IDX) {
1193 fprintf(stderr,"Nonexistent effect set with id %lu\n", (long unsigned int) effset.id);
1194 return 0;
1195 }
1196
1197 gsc_MarkerEffects* e = d->e + effIndex;
1198 if (e->centre == NULL) {
1199 e->centre = gsc_malloc_wrap(sizeof(*e->centre)*e->n_markers, GSC_TRUE);
1200 for (GSC_GENOLEN_T i = 0; i < e->n_markers; ++i) {
1201 e->centre[i] = 0.;
1202 }
1203 }
1204
1205 GSC_GENOLEN_T successes = 0;
1206 for (GSC_GENOLEN_T ix = 0; ix < n_markers; ++ix) {
1207 GSC_GENOLEN_T markerix;
1208 if (gsc_get_index_of_genetic_marker(marker_names[ix], d->genome, &markerix)) {
1209 ++successes;
1210 } else {
1211 fprintf(stderr,"Could not find marker named %s in the list of tracked markers\n", marker_names[ix]);
1212 continue;
1213 }
1214 e->centre[markerix] = centres[ix]; // the significant line
1215 }
1216 return successes;
1217}
1218
1257 const gsc_EffectID effset,
1258 const GSC_GENOLEN_T n_markers,
1259 const char** marker_names,
1260 const double* centres,
1261 const char allele,
1262 const _Bool reset_centres) {
1263 const GSC_ID_T effIndex = gsc_get_index_of_eff_set(d, effset);
1264 if (effIndex == GSC_NA_IDX) {
1265 fprintf(stderr,"Nonexistent effect set with id %lu\n", (long unsigned int) effset.id);
1266 return 0;
1267 }
1268
1269 gsc_MarkerEffects* e = d->e + effIndex;
1270 if (e->centre == NULL) {
1271 e->centre = gsc_malloc_wrap(sizeof(*e->centre)*e->n_markers, GSC_TRUE);
1272 for (GSC_GENOLEN_T i = 0; i < e->n_markers; ++i) {
1273 e->centre[i] = 0.;
1274 }
1275 } else if (reset_centres) {
1276 for (GSC_GENOLEN_T i = 0; i < e->n_markers; ++i) {
1277 e->centre[i] = 0.;
1278 }
1279 }
1280
1281 GSC_GENOLEN_T successes = 0;
1282 for (GSC_GENOLEN_T ix = 0; ix < n_markers; ++ix) {
1283 GSC_GENOLEN_T markerix;
1284 if (gsc_get_index_of_genetic_marker(marker_names[ix], d->genome, &markerix)) {
1285 // We have the marker. Let's find the effect of this allele
1286 _Bool found = 0;
1287 for (GSC_GENOLEN_T a = ((markerix > 0) ? e->cumn_alleles[markerix-1] : 0);
1288 a < e->cumn_alleles[markerix]; ++a) {
1289 if (e->allele[a] == allele) {
1290 found = 1;
1291
1292 // the significant lines:
1293 double mcentre = centres[ix] * e->eff[a];
1294 e->centre[markerix] = reset_centres ? mcentre : mcentre + e->centre[markerix];
1295 break;
1296 }
1297 }
1298 if (!found) {
1299 fprintf(stderr,"Could not find effect value for allele %c at marker %s\n", allele, marker_names[ix]);
1300 } else {
1301 ++successes;
1302 }
1303 } else {
1304 fprintf(stderr,"Could not find marker named %s in the list of tracked markers\n", marker_names[ix]);
1305 continue;
1306 }
1307 }
1308 return successes;
1309}
1310
1316int gsc_get_integer_digits(const int i) {
1317 int digits = 0, ii = i;
1318 while (ii != 0) {
1319 ii = ii / 10;
1320 digits ++;
1321 }
1322 return digits;
1323}
1324
1332static int gsc_helper_descending_pdouble_comparer(const void* pp0, const void* pp1) {
1333 double d0 = **(double **)pp0;
1334 double d1 = **(double **)pp1;
1335 if (d0 > d1) {
1336 return -1;
1337 } else {
1338 return (d0 < d1); // 0 if equal, 1 if d0 is smaller
1339 }
1340}
1341
1348static int gsc_helper_ascending_double_comparer(const void* pp0, const void* pp1) {
1349 double d0 = *(double *)pp0;
1350 double d1 = *(double *)pp1;
1351 if (d0 < d1) {
1352 return -1;
1353 } else {
1354 return (d0 > d1); // 0 if equal, 1 if d0 is smaller
1355 }
1356}
1357
1365static int gsc_helper_ascending_pdouble_comparer(const void* pp0, const void* pp1) {
1366 double d0 = **(double **)pp0;
1367 double d1 = **(double **)pp1;
1368 if (d0 < d1) {
1369 return -1;
1370 } else {
1371 return (d0 > d1); // 0 if equal, 1 if d0 is smaller
1372 }
1373}
1374
1378static int gsc_helper_indirect_alphabetical_str_comparer(const void* p0, const void* p1) {
1379 char* str1 = **(char***)p0;
1380 char* str2 = **(char***)p1;
1381 return strcmp(str1,str2);
1382}
1383
1387static int gsc_helper_mapfileunit_ascending_chr_comparer(const void* p0, const void* p1) {
1388 struct gsc_MapfileUnit s0 = *(struct gsc_MapfileUnit*)p0;
1389 struct gsc_MapfileUnit s1 = *(struct gsc_MapfileUnit*)p1;
1390 //return (s0.chr < s1.chr) ? -1 : (s0.chr > s1.chr);
1391 return strcmp(s0.chr, s1.chr);
1392}
1393
1397static int gsc_helper_mapfileunit_ascending_d_comparer(const void* p0, const void* p1) {
1398 struct gsc_MapfileUnit s0 = *(struct gsc_MapfileUnit*)p0;
1399 struct gsc_MapfileUnit s1 = *(struct gsc_MapfileUnit*)p1;
1400 return (s0.pos < s1.pos) ? -1 : (s0.pos > s1.pos);
1401}
1402
1406static int gsc_helper_effectfileunit_ascending_mix_comparer(const void* p0, const void* p1) {
1407 struct gsc_EffectfileUnit s0 = *(struct gsc_EffectfileUnit*)p0;
1408 struct gsc_EffectfileUnit s1 = *(struct gsc_EffectfileUnit*)p1;
1409 return (s0.markerix < s1.markerix) ? -1 : (s0.markerix > s1.markerix);
1410}
1411
1428 gsc_GenoLocation to,
1429 int* label_defaults) {
1430 if (to.localAM == from.localAM && to.localPos == from.localPos) {
1431 return;
1432 }
1433 if (to.localAM->groups[to.localPos].num != GSC_NO_GROUP.num) {
1434 fprintf(stderr,"In moving a genotype from %p:%lu to %p:%lu, the genotype at %p:%lu will be overwritten\n",
1435 from.localAM, (long unsigned int)from.localPos, to.localAM, (long unsigned int)to.localPos,
1436 to.localAM, (long unsigned int)to.localPos);
1437 --to.localAM->n_genotypes;
1438 }
1439 to.localAM->alleles[to.localPos] = from.localAM->alleles[from.localPos];
1440 from.localAM->alleles[from.localPos] = NULL;
1441
1442 to.localAM->names[to.localPos] = from.localAM->names[from.localPos];
1443 from.localAM->names[from.localPos] = NULL;
1444
1445 to.localAM->ids[to.localPos] = from.localAM->ids[from.localPos];
1446 from.localAM->ids[from.localPos] = GSC_NO_PEDIGREE;
1447
1448 to.localAM->pedigrees[0][to.localPos] = from.localAM->pedigrees[0][from.localPos];
1449 from.localAM->pedigrees[0][from.localPos] = GSC_NO_PEDIGREE;
1450 to.localAM->pedigrees[1][to.localPos] = from.localAM->pedigrees[1][from.localPos];
1451 from.localAM->pedigrees[1][from.localPos] = GSC_NO_PEDIGREE;
1452
1453 to.localAM->groups[to.localPos] = from.localAM->groups[from.localPos];
1454 from.localAM->groups[from.localPos] = GSC_NO_GROUP;
1455
1456 if (to.localAM->n_labels != from.localAM->n_labels) {
1457 fprintf(stderr,"Origin and destination when copying genotype do not have the same number of custom"
1458 " labels (n_labels). The genotype now at %p:%lu will have lost its label data\n",
1459 to.localAM, (long unsigned int)to.localPos);
1460 } else if (to.localAM->n_labels != 0 && label_defaults == NULL) {
1461 fprintf(stderr,"Label defaults must be supplied to gsc_move_genotypes or there is risk of "
1462 "corrupted label values in further use of the simulation");
1463 } else {
1464 for (GSC_ID_T i = 0; i < to.localAM->n_labels; ++i) {
1465 to.localAM->labels[i][to.localPos] = from.localAM->labels[i][from.localPos];
1466 from.localAM->labels[i][from.localPos] = label_defaults[i];
1467 }
1468 }
1469
1470 if (from.localAM != to.localAM) {
1471 --from.localAM->n_genotypes;
1472 ++to.localAM->n_genotypes;
1473 }
1474}
1475
1482 if (it->cursor.localAM == NULL) {
1484 } else if (it->cursor.localPos >= CONTIG_WIDTH) {
1485 it->cursor.localPos = 0;
1486 it->cursor.localAM = it->cursor.localAM->next;
1487 ++it->cursorAMIndex;
1488 if (it->cursor.localAM == NULL) {
1490 }
1491 }
1492 return it->cursor;
1493}
1494
1504 }
1505
1506 while (it->cursor.localAM->groups[it->cursor.localPos].num != GSC_NO_GROUP.num) {
1507
1508 // Trusts that n_genotypes is correct.
1509 if (it->cursor.localAM->n_genotypes == CONTIG_WIDTH) { // work-saver: skip this gsc_AlleleMatrix if it is already known to be full.
1510 it->cursor.localAM = it->cursor.localAM->next;
1511 ++it->cursorAMIndex;
1512 } else {
1513 ++it->cursor.localPos;
1514 }
1515
1518 }
1519 }
1520
1521 return it->cursor;
1522}
1523
1533 }
1534
1535 while (it->cursor.localAM->groups[it->cursor.localPos].num == GSC_NO_GROUP.num) {
1536 ++it->cursor.localPos;
1539 }
1540 }
1541
1542 return it->cursor;
1543}
1544
1545
1571 // Find the first gap
1572 struct gsc_GappyIterator filler = {.cursor=(gsc_GenoLocation){.localAM=d->m, .localPos=0},
1573 .cursorAMIndex=0};
1574 gsc_nextgappy_get_gap(&filler);
1575
1576 if (!GSC_IS_VALID_LOCATION(filler.cursor)) {
1577 return; // no gaps found
1578 }
1579
1580 struct gsc_GappyIterator checker = filler; // copy filler
1581 ++checker.cursor.localPos;
1582 gsc_nextgappy_get_nongap(&checker);
1583
1584 // Shuffle all candidates back
1585 while (GSC_IS_VALID_LOCATION(filler.cursor) && GSC_IS_VALID_LOCATION(checker.cursor)) {
1586 gsc_move_genotype(checker.cursor, filler.cursor, d->label_defaults);
1587
1588 ++filler.cursor.localPos;
1589 gsc_nextgappy_get_gap(&filler);
1590
1591 ++checker.cursor.localPos;
1592 gsc_nextgappy_get_nongap(&checker);
1593 }
1594
1595 // Then, free any other pre-allocated space
1596 while (GSC_IS_VALID_LOCATION(filler.cursor)) {
1597 if (filler.cursor.localAM->n_genotypes == 0) {
1598 // no genotypes after this point
1599 AlleleMatrix* previous = gsc_get_nth_AlleleMatrix(d->m, filler.cursorAMIndex - 1);
1600 if (previous != NULL) {
1601 previous->next = NULL;
1603 }
1604 filler.cursor.localAM = NULL;
1605
1606 } else {
1607 // If this gap has allocated space, clear it.
1608 if (gsc_get_alleles(filler.cursor) != NULL) {
1610 filler.cursor.localAM->alleles[filler.cursor.localPos] = NULL;
1611 }
1612 if (gsc_get_name(filler.cursor) != NULL) {
1613 GSC_FREE(gsc_get_name(filler.cursor));
1614 filler.cursor.localAM->names[filler.cursor.localPos] = NULL;
1615 }
1616 filler.cursor.localAM->ids[filler.cursor.localPos] = GSC_NO_PEDIGREE;
1619 filler.cursor.localAM->groups[filler.cursor.localPos] = GSC_NO_GROUP;
1620
1621 ++filler.cursor.localPos;
1622 gsc_nextgappy_get_gap(&filler);
1623 }
1624 }
1625}
1626
1627
1628
1629/*----------------------------------Locators---------------------------------*/
1630
1631
1655 const gsc_GroupNum group) {
1656 return gsc_create_bidirectional_iter_fromAM(d->m, group);
1657}
1658
1660 const gsc_GroupNum group) {
1661 return (gsc_BidirectionalIterator) {
1662 .am = am,
1663 .group = group,
1664 .localPos = GSC_NA_LOCALX,
1665
1666 .cachedAM = am,
1667 .cachedAMIndex = 0,
1668
1669 .atStart = 0,
1670 .atEnd = 0
1671 };
1672}
1673
1702 GSC_LOCALX_T first = 0;
1703 gsc_AlleleMatrix* firstAM = d->m;
1704 _Bool anyExist = 1;
1705
1706 // Want to know:
1707 // - is this group empty? (randomAccess should know if group size is 0)
1708 // - what is the first genotype index in this group?
1709
1710 if (firstAM == NULL) {
1711 return (gsc_RandomAccessIterator) {
1712 .d = d,
1713 .group = group,
1714
1715 .largestCached = 0,
1716 .groupSize = 0, // NA represents unknown, 0 represents empty
1717 .cacheSize = 0,
1718 .cache = NULL
1719 };
1720
1721 } else if (group.num == GSC_NO_GROUP.num) { // scanning all genotypes
1722 while (firstAM->n_genotypes == 0) {
1723 if (firstAM->next == NULL) {
1724 // gsc_SimData is empty. Nowhere to go.
1725 anyExist = 0;
1726 } else { // Keep moving forwards through the list. Not polite enough to clean up the blank AM.
1727 firstAM = firstAM->next;
1728 }
1729 }
1730
1731 } else { // scanning a specific group
1732 _Bool exitNow = 0;
1733 while (!exitNow) {
1734
1735 // Set first, firstAM, firstAMIndex if appropriate
1736 for (GSC_LOCALX_T i = 0; i < firstAM->n_genotypes; ++i) {
1737 if (firstAM->groups[i].num == group.num) {
1738 first = i;
1739 exitNow = 1;
1740 break;
1741 }
1742 }
1743
1744 // Move along and set anyExist if appropriate
1745 if (!exitNow) {
1746 firstAM = firstAM->next;
1747 if (firstAM == NULL) {
1748 anyExist = 0;
1749 exitNow = 1;
1750 }
1751 }
1752 }
1753 }
1754
1755 gsc_GenoLocation* cache = NULL;
1756 GSC_GLOBALX_T cacheSize = 0;
1757 if (anyExist) {
1758 cacheSize = 50;
1759 cache = gsc_malloc_wrap((sizeof(gsc_GenoLocation)*cacheSize),GSC_TRUE);
1760 cache[0] = (gsc_GenoLocation) {
1761 .localAM= firstAM,
1762 .localPos = first,
1763 };
1764 for (GSC_GLOBALX_T i = 1; i < cacheSize; ++i) {
1765 cache[i] = GSC_INVALID_GENO_LOCATION;
1766 }
1767
1768 }
1769
1770 return (gsc_RandomAccessIterator) {
1771 .d = d,
1772 .group = group,
1773
1774 .largestCached = anyExist ? 0 : GSC_NA_GLOBALX,
1775 .groupSize = anyExist ? GSC_NA_GLOBALX : 0, // NA represents unknown, 0 represents empty
1776 .cacheSize = cacheSize,
1777 .cache = cache
1778 };
1779}
1780
1791 unsigned int currentIndex = 0;
1792 gsc_AlleleMatrix* am = listStart;
1793 if (am == NULL) return NULL;
1794 while (currentIndex < n) {
1795 if (am->next == NULL) {
1796 return NULL;
1797 } else {
1798 am = am->next;
1799 currentIndex++;
1800 }
1801 }
1802 return am;
1803}
1804
1816 GSC_LOCALX_T first = 0;
1817 gsc_AlleleMatrix* firstAM = it->am;
1818 unsigned int firstAMIndex = 0;
1819 _Bool anyExist = 1;
1820
1821 // Want to know:
1822 // - is this group empty? (iterator should know if it is at the end as well as at the start)
1823 // - what is the first genotype index in this group?
1824 if (firstAM == NULL) {
1826
1827 } else if (it->group.num == GSC_NO_GROUP.num) {
1828 while (firstAM->n_genotypes == 0) {
1829 if (firstAM->next == NULL) {
1830 anyExist = 0; // gsc_SimData is empty.
1831
1832 } else { // (Not polite enough to clean up the blank AM.)
1833 firstAM = firstAM->next;
1834 firstAMIndex++;
1835 // first += 0;
1836 }
1837 }
1838
1839 // After this runs we have set firstAM, first, firstAMIndex, anyExist appropriately
1840
1841 } else { // scanning a specific group
1842
1843 _Bool exitNow = 0;
1844 while (!exitNow) {
1845
1846 // Set first, firstAM, firstAMIndex if appropriate
1847 for (GSC_LOCALX_T i = 0; i < firstAM->n_genotypes; ++i) {
1848 if (firstAM->groups[i].num == it->group.num) {
1849 first = i;
1850 exitNow = 1;
1851 break;
1852 }
1853 }
1854
1855 // Move along and set anyExist if appropriate
1856 if (!exitNow) {
1857 firstAM = firstAM->next;
1858 firstAMIndex++;
1859 if (firstAM == NULL) {
1860 first = GSC_NA_LOCALX;
1861 anyExist = 0;
1862 exitNow = 1;
1863 }
1864 }
1865 }
1866 }
1867
1868 it->localPos = first;
1869 if (anyExist) {
1870 it->atStart = 1;
1871 it->atEnd = 0;
1872 } else { // fail immediately on all further accesses. The group is empty.
1873 it->atStart = 1;
1874 it->atEnd = 1;
1875 }
1876 it->cachedAM = firstAM;
1877 it->cachedAMIndex = firstAMIndex;
1878
1879 return (gsc_GenoLocation) {
1880 .localAM = firstAM,
1881 .localPos = first
1882 };
1883}
1884
1896 GSC_LOCALX_T last = 0;
1897 gsc_AlleleMatrix* lastAM = it->am;
1898 unsigned int lastAMIndex = 0;
1899 _Bool anyExist = 1;
1900
1901 // Want to know:
1902 // - is this group empty? (iterator should know if it is at the end as well as at the start)
1903 // - what is the first genotype index in this group?
1904
1905 if (lastAM == NULL) {
1907
1908 } if (it->group.num == GSC_NO_GROUP.num) {
1909 while (lastAM->next != NULL && lastAM->next->n_genotypes != 0) {
1910 lastAM = lastAM->next;
1911 lastAMIndex++;
1912 }
1913 if (lastAMIndex > 0 || lastAM->n_genotypes > 0) {
1914 last = lastAM->n_genotypes - 1;
1915 } else {
1916 anyExist = 0;
1917 }
1918
1919 } else { // scanning a specific group
1920
1921 // Find last AM
1922 while (lastAM->next != NULL && lastAM->next->n_genotypes != 0) {
1923 lastAM = lastAM->next;
1924 lastAMIndex++;
1925 }
1926
1927 _Bool exitNow = 0;
1928 while (!exitNow) {
1929
1930 // Set first, firstAM, firstAMIndex if appropriate
1931 for (GSC_LOCALX_T i = lastAM->n_genotypes - 1; i >= 0; --i) {
1932 if (lastAM->groups[i].num == it->group.num) {
1933 last = i;
1934 exitNow = 1;
1935 break;
1936 }
1937 }
1938
1939 // Move along and set anyExist if appropriate
1940 if (!exitNow) {
1941 --lastAMIndex;
1942 lastAM = gsc_get_nth_AlleleMatrix(it->am, lastAMIndex);
1943 if (lastAM->n_genotypes == 0) {
1944 last = GSC_NA_LOCALX;
1945 anyExist = 0;
1946 exitNow = 1;
1947 }
1948 }
1949 }
1950 }
1951
1952 it->localPos = last;
1953 if (anyExist) {
1954 it->atStart = 0;
1955 it->atEnd = 1;
1956 } else { // group is empty: fail immediately on any further accesses
1957 it->atStart = 1;
1958 it->atEnd = 1;
1959 }
1960 it->cachedAM = lastAM;
1961 it->cachedAMIndex = lastAMIndex;
1962
1963 return (gsc_GenoLocation) {
1964 .localAM = lastAM,
1965 .localPos = last
1966 };
1967}
1968
1969
1990 if (it->localPos == GSC_NA_LOCALX) {
1992 }
1993
1994 if (it->atEnd) { // || validate_bidirectional_cache(it) == GSC_FALSE) { // can't use this because what if our iterator user is modifying group allocations?
1996 }
1997
1998 if (it->group.num == GSC_NO_GROUP.num) {
1999
2000 // Search for the next value.
2001 if (it->localPos + 1 < it->cachedAM->n_genotypes) {
2002 // The next value is in the same gsc_AlleleMatrix
2003 it->localPos++;
2004 it->atStart = 0;
2005 return (gsc_GenoLocation) {
2006 .localAM = it->cachedAM,
2007 .localPos = it->localPos
2008 };
2009
2010 } else {
2011 // The next value is in the next gsc_AlleleMatrix
2012 gsc_AlleleMatrix* nextAM = it->cachedAM;
2013 int nextAMIndex = it->cachedAMIndex;
2014 if (nextAM != NULL) {
2015 do {
2016 nextAM = nextAM->next;
2017 nextAMIndex++;
2018 } while (nextAM != NULL && nextAM->n_genotypes == 0);
2019 }
2020
2021 if (nextAM == NULL) {
2022 // There is no further gsc_AlleleMatrix; we are at the end of the iterator.
2023 it->atEnd = 1;
2025 } else {
2026 it->cachedAM = nextAM;
2027 it->cachedAMIndex = nextAMIndex;
2028 it->localPos = 0;
2029 it->atStart = 0;
2030 return (gsc_GenoLocation) {
2031 .localAM = it->cachedAM,
2032 .localPos = 0
2033 };
2034 }
2035 }
2036
2037 } else { // We are iterating through a specific group
2038
2039 // Search for the next value
2040 while(1) {
2041 if (it->localPos + 1 < it->cachedAM->n_genotypes) {
2042 for (++it->localPos; it->localPos < it->cachedAM->n_genotypes; ++it->localPos) {
2043 if (it->cachedAM->groups[it->localPos].num == it->group.num) {
2044 it->atStart = 0;
2045 return (gsc_GenoLocation) {
2046 .localAM = it->cachedAM,
2047 .localPos = it->localPos
2048 };
2049 }
2050 }
2051 }
2052
2053 gsc_AlleleMatrix* nextAM = it->cachedAM;
2054 int nextAMIndex = it->cachedAMIndex;
2055 if (nextAM != NULL) {
2056 do {
2057 nextAM = nextAM->next;
2058 nextAMIndex++;
2059 } while (nextAM != NULL && nextAM->n_genotypes == 0);
2060 }
2061
2062 if (nextAM == NULL) {
2063 // There is no further gsc_AlleleMatrix; we are at the end of the iterator.
2064 it->atEnd = 1;
2066 } else {
2067 it->cachedAM = nextAM;
2068 it->cachedAMIndex = nextAMIndex;
2069 it->localPos = 0;
2070 if (it->cachedAM->groups[it->localPos].num == it->group.num) {
2071 it->atStart = 0;
2072 return (gsc_GenoLocation) {
2073 .localAM = it->cachedAM,
2074 .localPos = it->localPos
2075 };
2076 }
2077 }
2078 }
2079
2080 }
2081}
2082
2083
2108 if (it->localPos == GSC_NA_LOCALX) {
2110 }
2111
2112 if (it->atStart) { //|| validate_bidirectional_cache(it) == GSC_FALSE) {
2114 }
2115
2116 if (it->group.num == GSC_NO_GROUP.num) {
2117
2118 // Search for the previous value.
2119 if (it->localPos > 0) {
2120 // The previous value is in the same gsc_AlleleMatrix
2121 it->localPos--;
2122 it->atEnd = 0;
2123 return (gsc_GenoLocation) {
2124 .localAM = it->cachedAM,
2125 .localPos = it->localPos
2126 };
2127
2128 } else {
2129 // The previous value is in the previous gsc_AlleleMatrix
2130 if (it->cachedAMIndex == 0) {
2131 it->atStart = 1;
2133 } else {
2134 gsc_AlleleMatrix* nextAM = it->cachedAM;
2135 int nextAMIndex = it->cachedAMIndex;
2136 if (nextAM != NULL) {
2137 do {
2138 nextAMIndex--;
2139 nextAM = gsc_get_nth_AlleleMatrix(it->am, nextAMIndex);
2140 } while (nextAM != NULL && nextAM->n_genotypes == 0);
2141 }
2142
2143 if (nextAM == NULL) {
2144 it->atStart = 1;
2146 } else {
2147 it->cachedAM = nextAM;
2148 it->cachedAMIndex = nextAMIndex;
2149 it->localPos = it->cachedAM->n_genotypes - 1;
2150 it->atEnd = 0;
2151 return (gsc_GenoLocation) {
2152 .localAM = it->cachedAM,
2153 .localPos = it->localPos
2154 };
2155 }
2156 }
2157 }
2158
2159 } else { // We are iterating through a specific group
2160
2161 // Search for the next value
2162 while(1) {
2163 if (it->localPos > 0) {
2164 for (--it->localPos; it->localPos >= 0; --it->localPos) {
2165 if (it->cachedAM->groups[it->localPos].num == it->group.num) {
2166 it->atEnd = 0;
2167 return (gsc_GenoLocation) {
2168 .localAM = it->cachedAM,
2169 .localPos = it->localPos
2170 };
2171 }
2172 }
2173 }
2174
2175 if (it->cachedAMIndex == 0) {
2176 it->atStart = 1;
2177 it->localPos = 0;
2179 } else {
2180 gsc_AlleleMatrix* nextAM = it->cachedAM;
2181 int nextAMIndex = it->cachedAMIndex;
2182 if (nextAM != NULL) {
2183 do {
2184 nextAMIndex--;
2185 nextAM = gsc_get_nth_AlleleMatrix(it->am, nextAMIndex);
2186 } while (nextAM != NULL && nextAM->n_genotypes == 0);
2187 }
2188
2189 if (nextAM == NULL) {
2190 it->atStart = 1;
2192 } else {
2193 it->cachedAM = nextAM;
2194 it->cachedAMIndex = nextAMIndex;
2195 it->localPos = it->cachedAM->n_genotypes - 1;
2196 if (it->cachedAM->groups[it->localPos].num == it->group.num) {
2197 it->atEnd = 0;
2198 return (gsc_GenoLocation) {
2199 .localAM = it->cachedAM,
2200 .localPos = it->localPos
2201 };
2202 }
2203 }
2204 }
2205 }
2206 }
2207}
2208
2209
2233 // Validity checks for a random access iterator: largestCached must exist,
2234 // is indeed cached and belongs to the same group
2235 /*if (it->largestCached == GSC_NA_GLOBALX ||
2236 (!GSC_IS_VALID_LOCATION(it->cache[it->largestCached]) &&
2237 (it->group.num == GSC_NO_GROUP.num ||
2238 it->group.num != gsc_get_group(it->cache[it->largestCached]).num))) {
2239 return GSC_INVALID_GENO_LOCATION;
2240 }*/
2241
2242 // Step 0: Fail immediately if we know there aren't this many candidates in the group.
2243 if (it->groupSize != GSC_NA_GLOBALX && it->groupSize <= n) {
2245 }
2246
2247 // Step 1: Check if we have it in the cache.
2248 if (n < it->cacheSize) {
2249 // 'n' is less than or equal to our current furthest cached group member.
2250
2251 if (GSC_IS_VALID_LOCATION(it->cache[n])) { return it->cache[n]; }
2252 // Otherwise we do not have it cached, but we will enter it into the cache in the next section
2253 }
2254
2255 // Step 2: The effort of actually finding the nth group member.
2256 if (it->group.num == GSC_NO_GROUP.num) {
2257 // Assuming all non-end gsc_AlleleMatrix are filled to CONTIG_WIDTH
2258 gsc_GenoLocation expectedLocation = {
2260 .localPos = n % CONTIG_WIDTH
2261 };
2262 // Check n was not too large
2263 if (expectedLocation.localAM == NULL ||
2264 expectedLocation.localAM->n_genotypes <= expectedLocation.localPos) {
2266 }
2267 return expectedLocation;
2268
2269 } else { // searching for a particular group
2270
2271 gsc_AlleleMatrix* currentAM;
2272 GSC_GLOBALX_T groupN;
2273 GSC_LOCALX_T localPos;
2274
2275 if (!GSC_IS_VALID_LOCATION(it->cache[it->largestCached])) {
2276 // Cache is invalid. You should throw out the iterator and replace with a new one.
2278 }
2279
2280 // Search forwards from largestCached
2281 currentAM = it->cache[it->largestCached].localAM;
2282 groupN = it->largestCached;
2283 localPos = it->cache[it->largestCached].localPos + 1;
2284
2285 while (1) {
2286 for (; localPos < currentAM->n_genotypes; ++localPos) {
2287 // If we found a group member, cache it and count upwards towards n
2288 if (currentAM->groups[localPos].num == it->group.num) {
2289 it->largestCached = ++groupN;
2290
2291 // Do we need to expand the cache to hold this?
2292 if (it->largestCached >= it->cacheSize) {
2293 GSC_GLOBALX_T newCacheSize = it->cacheSize;
2294 if (it->cacheSize == 0) {
2295 newCacheSize = 25;
2296 } else {
2297 newCacheSize = newCacheSize << 1;
2298 }
2299 gsc_GenoLocation* newCache = gsc_malloc_wrap(sizeof(gsc_GenoLocation)*newCacheSize,GSC_TRUE);
2300 // initialise
2301 memcpy(newCache, it->cache, sizeof(*newCache)*it->cacheSize);
2302 for (GSC_GLOBALX_T i = it->cacheSize; i < newCacheSize; ++i) {
2303 newCache[i] = GSC_INVALID_GENO_LOCATION;
2304 }
2305 // clean
2306 GSC_FREE(it->cache);
2307 it->cache = newCache;
2308 it->cacheSize = newCacheSize;
2309 }
2310
2311 // Store this additional group member.
2312 it->cache[groupN] = (gsc_GenoLocation) {
2313 .localAM = currentAM,
2314 .localPos = localPos
2315 };
2316 if (groupN == n) {
2317 return it->cache[n];
2318 }
2319 }
2320 }
2321
2322 if (currentAM->next == NULL || currentAM->next->n_genotypes == 0) {
2323 // We are at the end of the iterator and have not found n
2324 it->groupSize = groupN + 1;
2326 } else {
2327 currentAM = currentAM->next;
2328 localPos = 0;
2329 }
2330
2331 }
2332 }
2333
2334}
2335
2357 if (id.id == GSC_NO_PEDIGREE.id) {
2358 fprintf(stderr, "Invalid ID %lu\n", (long unsigned int)id.id);
2359 return NULL;
2360 }
2361 if (start == NULL) {
2362 fprintf(stderr, "Invalid nonexistent allelematrix\n"); exit(1);
2363 }
2364 const gsc_AlleleMatrix* m = start;
2365
2366 while (1) {
2367 // try to find our id. Does this AM potentially have the right range for it?
2368 // If we're not sure, because either of the endpoints does not have its ID tracked,
2369 // check anyway
2370 if (m->n_genotypes != 0 && (id.id >= m->ids[0].id || m->ids[0].id == GSC_NO_PEDIGREE.id) &&
2371 (id.id <= m->ids[m->n_genotypes - 1].id || m->ids[m->n_genotypes - 1].id == GSC_NO_PEDIGREE.id)) {
2372
2374
2375 if (index > m->n_genotypes) {
2376 // search failed
2377 if (m->next == NULL) {
2378 fprintf(stderr, "Could not find the ID %lu: did you prematurely delete this genotype?\n", (long unsigned int)id.id);
2379 return NULL;
2380 } else {
2381 m = m->next;
2382 continue;
2383 }
2384 }
2385
2386 return m->names[index];
2387
2388 }
2389
2390 if (m->next == NULL) {
2391 fprintf(stderr, "Could not find the ID %lu: did you prematurely delete this genotype?\n", (long unsigned int)id.id);
2392 return NULL;
2393 } else {
2394 m = m->next;
2395 }
2396 }
2397}
2398
2423 const gsc_PedigreeID id,
2424 gsc_PedigreeID output[static 2]) {
2425 if (id.id == GSC_NO_PEDIGREE.id) {
2426 return 1;
2427 }
2428 if (start == NULL) {
2429 fprintf(stderr, "Invalid nonexistent allelematrix\n"); exit(1);
2430 }
2431 const gsc_AlleleMatrix* m = start;
2432 while (1) {
2433 // try to find our id. Does this AM have the right range for it?
2434 if (m->n_genotypes != 0 && id.id >= m->ids[0].id && id.id <= m->ids[m->n_genotypes - 1].id) {
2435 // perform binary search to find the exact index.
2437
2438 if (index == GSC_NA_LOCALX) {
2439 // search failed
2440 /*if (m->next == NULL) {
2441 fprintf(stderr, "Unable to locate ID %d in simulation memory (genotype has likely been deleted): pedigree past this point cannot be determined\n", id.id);
2442 return 2;
2443 } else {
2444 m = m->next;
2445 }*/
2446 continue;
2447 } else {
2448
2449 if (m->pedigrees[0][index].id != GSC_NO_PEDIGREE.id || m->pedigrees[1][index].id != GSC_NO_PEDIGREE.id) {
2450 output[0] = m->pedigrees[0][index];
2451 output[1] = m->pedigrees[1][index];
2452 return 0;
2453 }
2454 return 1; // if neither parent's id is known
2455 }
2456
2457 }
2458
2459 if (m->next == NULL) {
2460 fprintf(stderr, "Unable to locate ID %lu in simulation memory (genotype has likely been deleted): pedigree past this point cannot be determined\n", (long unsigned int)id.id);
2461 return 2;
2462 } else {
2463 m = m->next;
2464 }
2465 }
2466}
2467
2484 const size_t n_names,
2485 const char** names,
2486 gsc_PedigreeID* output) {
2487 if (start == NULL || (start->n_genotypes <= 0 && start->next == NULL)) {
2488 fprintf(stderr,"Invalid start parameter: gsc_AlleleMatrix* `start` must exist\n");
2489 return;
2490 }
2491 if (n_names < 1) {
2492 fprintf(stderr,"Invalid n_names parameter: Search list length must be positive\n");
2493 return;
2494 }
2495
2496 _Bool found;
2497 const gsc_AlleleMatrix* m;
2498
2499 for (size_t i = 0; i < n_names; ++i) {
2500 found = 0;
2501 output[i] = GSC_NO_PEDIGREE;
2502 m = start;
2503 while (1) {
2504 // try to identify the name in this AM
2505 for (GSC_LOCALX_T j = 0; j < m->n_genotypes; ++j) {
2506 if (strcmp(m->names[j], names[i]) == 0) {
2507 found = 1;
2508 output[i] = m->ids[j];
2509 break;
2510 }
2511 }
2512
2513 if (found) {
2514 break;
2515 }
2516 if ((m = m->next) == NULL) {
2517 fprintf(stderr, "Didn't find the name %s\n", names[i]);
2518 }
2519 }
2520 }
2521}
2522
2538 const gsc_PedigreeID parent1id,
2539 const gsc_PedigreeID parent2id) {
2540 if (start == NULL || (start->n_genotypes <= 0 && start->next == NULL)) {
2541 fprintf(stderr,"Invalid start parameter: gsc_AlleleMatrix* `start` must exist\n");
2542 return GSC_NA_GLOBALX;
2543 }
2544 const gsc_AlleleMatrix* m = start;
2545 GSC_GLOBALX_T total_j = 0;
2546
2547 while (1) {
2548 // try to identify the child in this AM
2549 for (GSC_LOCALX_T j = 0; j < m->n_genotypes; ++j, ++total_j) {
2550 if ((parent1id.id == m->pedigrees[0][j].id && parent2id.id == m->pedigrees[1][j].id) ||
2551 (parent1id.id == m->pedigrees[1][j].id && parent2id.id == m->pedigrees[0][j].id)) {
2552 return total_j;
2553 }
2554 }
2555
2556 if ((m = m->next) == NULL) {
2557 fprintf(stderr, "Didn't find the child of %lu & %lu\n",
2558 (long unsigned int)parent1id.id, (long unsigned int)parent2id.id);
2559 return GSC_NA_GLOBALX;
2560 }
2561 }
2562}
2563
2578 if (name == NULL) {
2579 return GSC_NA_GLOBALX;
2580 }
2581 if (start == NULL || (start->n_genotypes <= 0 && start->next == NULL)) {
2582 fprintf(stderr,"Invalid start parameter: gsc_AlleleMatrix* `start` must exist\n");
2583 return GSC_NA_GLOBALX;
2584 }
2585 const gsc_AlleleMatrix* m = start;
2586 GSC_GLOBALX_T total_j = 0;
2587
2588 while (1) {
2589 // try to identify the child in this AM
2590 for (GSC_LOCALX_T j = 0; j < m->n_genotypes; ++j, ++total_j) {
2591 if (m->names[j] != NULL && strcmp(m->names[j], name) == 0) {
2592 return total_j;
2593 }
2594 }
2595
2596 if ((m = m->next) == NULL) {
2597 fprintf(stderr, "Didn't find the name %s\n", name);
2598 return GSC_NA_GLOBALX;
2599 }
2600 }
2601}
2602
2615 const GSC_GLOBALX_T index) {
2616 if (start == NULL) {
2617 fprintf(stderr,"Invalid start parameter: gsc_AlleleMatrix* `start` must exist\n");
2618 return GSC_NO_PEDIGREE;
2619 }
2620 const gsc_AlleleMatrix* m = start;
2621 GSC_GLOBALX_T total_j = 0;
2622
2623 while (1) {
2624 if (total_j == index) {
2625 return m->ids[0];
2626 } else if (total_j < index && total_j + m->n_genotypes > index) {
2627 return m->ids[index - total_j];
2628 }
2629 total_j += m->n_genotypes;
2630
2631 if ((m = m->next) == NULL) {
2632 fprintf(stderr, "Didn't find the index %lu\n", (long unsigned int) index);
2633 return GSC_NO_PEDIGREE;
2634 }
2635 }
2636}
2637
2654 const GSC_GLOBALX_T index) {
2655 if (start == NULL) {
2656 fprintf(stderr, "Invalid nonexistent allelematrix\n");
2657 return NULL;
2658 }
2659 const gsc_AlleleMatrix* m = start;
2660 GSC_GLOBALX_T total_j = 0;
2661
2662 while (1) {
2663 if (total_j == index) {
2664 return m->alleles[0];
2665 } else if (total_j < index && total_j + m->n_genotypes > index) {
2666 return m->alleles[index - total_j];
2667 }
2668 total_j += m->n_genotypes;
2669
2670 if ((m = m->next) == NULL) {
2671 fprintf(stderr, "Didn't find the index %lu\n", (long unsigned int) index);
2672 return NULL;
2673 }
2674 }
2675}
2676
2677
2678
2679/*-----------------------------------Groups----------------------------------*/
2680
2695 const size_t list_len,
2696 const gsc_GroupNum* grouplist) {
2697
2698 // Find the first group in the list that exists. In most use cases this will be the
2699 // first group in the list, so not too much of a performance penalty.
2700 gsc_GroupNum outGroup = GSC_NO_GROUP;
2701 size_t i = 0;
2702 for (; i < list_len; ++i) {
2703 gsc_GroupNum candidate = grouplist[i];
2705 gsc_GenoLocation testloc = gsc_next_forwards(&testit);
2707 if (GSC_IS_VALID_LOCATION(testloc)) {
2708 outGroup = candidate;
2709 break;
2710 }
2711 }
2712
2713 int remaininglistlen = list_len - i;
2714 if (remaininglistlen < 2) {
2715 return outGroup;
2716 } else if (remaininglistlen == 2) {
2717 if (grouplist[i].num == grouplist[i+1].num) {
2718 return outGroup;
2719 }
2722 int anyFound = GSC_IS_VALID_LOCATION(loc);
2723
2724 while (GSC_IS_VALID_LOCATION(loc)) {
2725 gsc_set_group(loc,outGroup);
2726 loc = gsc_next_forwards(&it);
2727 }
2728
2729 if (anyFound) {
2730 d->n_groups--;
2731 }
2733 return outGroup;
2734
2735 } else {
2736 GSC_CREATE_BUFFER(isDuplicate,_Bool,remaininglistlen);
2737 memset(isDuplicate, 0, sizeof(_Bool)*remaininglistlen);
2738 for (size_t ii = i; ii < list_len; ++ii) {
2739 for (size_t jj = ii+1; jj < list_len; ++jj) {
2740 if (grouplist[ii].num == grouplist[jj].num) {
2741 isDuplicate[jj-i] = 1;
2742 }
2743 }
2744 }
2745
2746 GSC_CREATE_BUFFER(anyFound,_Bool,remaininglistlen);
2747 memset(anyFound, 0, sizeof(_Bool)*remaininglistlen);
2748
2750 gsc_GroupNum cachedgroup = GSC_NO_GROUP; // just for speedier lookups. Groups tend to be stored contiguous in most simulations.
2752
2753 while (GSC_IS_VALID_LOCATION(loc)) {
2754 if (gsc_get_group(loc).num == cachedgroup.num) {
2755 gsc_set_group(loc,outGroup);
2756 } else {
2757 for (size_t k = i+1; k < list_len; ++k) {
2758 if (gsc_get_group(loc).num == grouplist[k].num) {
2759 gsc_set_group(loc,outGroup);
2760 cachedgroup = grouplist[k];
2761 anyFound[k-i] = 1;
2762 break;
2763 }
2764 }
2765 }
2766
2767 loc = gsc_next_forwards(&it);
2768 }
2769
2770 size_t groupsgone = 0;
2771 for (size_t j = 0; j < remaininglistlen; ++j) {
2772 if (!isDuplicate[j] && anyFound[j]) {
2773 groupsgone++;
2774 }
2775 }
2776 d->n_groups -= groupsgone;
2778 GSC_DELETE_BUFFER(anyFound);
2779 GSC_DELETE_BUFFER(isDuplicate);
2780 return outGroup;
2781 }
2782}
2783
2801 const size_t index_list_len,
2802 const GSC_GLOBALX_T* genotype_indexes) {
2803 if (index_list_len < 1) {
2804 fprintf(stderr,"Invalid index_list_len value: length of allocation list must be at least 1\n");
2805 return GSC_NO_GROUP;
2806 }
2807
2808 gsc_GroupNum newGroup = gsc_get_new_group_num(d);
2810 size_t invalidLocations = 0;
2811 for (size_t i = 0; i < index_list_len; ++i) {
2812 gsc_GenoLocation loc = gsc_next_get_nth(&it, genotype_indexes[i]);
2813 if (GSC_IS_VALID_LOCATION(loc)) {
2814 gsc_set_group(loc,newGroup);
2815 } else {
2816 invalidLocations++;
2817 }
2818 }
2819
2820 if (invalidLocations > 0) {
2821 fprintf(stderr,"%lu indexes were invalid\n",(long unsigned int)invalidLocations);
2822 }
2824
2825 if (invalidLocations < index_list_len) {
2826 d->n_groups++;
2827 return newGroup;
2828 } else {
2829 return GSC_NO_GROUP;
2830 }
2831}
2832
2855 const gsc_GroupNum group,
2856 const gsc_LabelID whichLabel,
2857 const int valueToSplit) {
2858 GSC_ID_T labelix;
2859 if (whichLabel.id == GSC_NO_LABEL.id || (labelix = gsc_get_index_of_label(d, whichLabel)) == GSC_NA_IDX) {
2860 fprintf(stderr, "Nonexistent label %lu\n", (long unsigned int)whichLabel.id);
2861 return GSC_NO_GROUP;
2862 }
2863
2864 gsc_GroupNum newGroup = gsc_get_new_group_num(d);
2865 _Bool anyFound = 0;
2866
2869 while (GSC_IS_VALID_LOCATION(loc)) {
2870 if (gsc_get_label_value(loc,labelix) == valueToSplit) {
2871 gsc_set_group(loc,newGroup);
2872 anyFound = 1;
2873 }
2874
2875 loc = gsc_next_forwards(&it);
2876 }
2877
2878 if (anyFound) {
2879 d->n_groups++;
2880 return newGroup;
2881 } else {
2882 return GSC_NO_GROUP;
2883 }
2884
2885}
2886
2911 const gsc_GroupNum group,
2912 const gsc_LabelID whichLabel,
2913 const int valueLowBound,
2914 const int valueHighBound) {
2915 GSC_ID_T labelix;
2916 if (whichLabel.id == GSC_NO_LABEL.id || (labelix = gsc_get_index_of_label(d, whichLabel)) == GSC_NA_IDX) {
2917 fprintf(stderr, "Nonexistent label %lu\n", (long unsigned int)whichLabel.id);
2918 return GSC_NO_GROUP;
2919 }
2920 if (valueLowBound > valueHighBound) {
2921 fprintf(stderr, "Empty range %d to %d: no group created\n", valueLowBound, valueHighBound);
2922 return GSC_NO_GROUP;
2923 }
2924
2925 gsc_GroupNum newGroup = gsc_get_new_group_num(d);
2926 _Bool anyFound = 0;
2927
2930 while (GSC_IS_VALID_LOCATION(loc)) {
2931 if (gsc_get_label_value(loc,labelix) >= valueLowBound &&
2932 gsc_get_label_value(loc,labelix) <= valueHighBound) {
2933 gsc_set_group(loc,newGroup);
2934 anyFound = 1;
2935 }
2936
2937 loc = gsc_next_forwards(&it);
2938 }
2939
2940 if (anyFound) {
2941 d->n_groups++;
2942 return newGroup;
2943 } else {
2944 return GSC_NO_GROUP; // no values with that label
2945 }
2946}
2947
2948
2971 const gsc_GroupNum group_id,
2972 void* somequality_data,
2973 gsc_GroupNum (*somequality_tester)(gsc_GenoLocation,
2974 void*,
2975 size_t,
2976 size_t,
2977 gsc_GroupNum*),
2978 size_t maxentries_results,
2979 gsc_GroupNum* results) {
2980 // Access existing groups (to be used to find unused group numbers,
2981 // and to find maximum number of groups we'd be able to create)
2982 GSC_CREATE_BUFFER(currentgroups,gsc_GroupNum,d->n_groups);
2983 GSC_CREATE_BUFFER(currentsizes,GSC_GLOBALX_T,d->n_groups);
2984 size_t n_groups = gsc_get_existing_group_counts(d, currentgroups, currentsizes);
2985 size_t bookmark = 0;
2986 gsc_GroupNum nextgroup = GSC_NO_GROUP;
2987
2988 // splitgroupsize is size_t not GLOBALX_T because it will be used as the maximum number of output
2989 // groups that could be produced, not used to operate on candidates in the group. (By default though
2990 // GSC_GLOBALX_T is an alias of size_t so it makes no difference).
2991 size_t splitgroupsize = 0;
2992 for (size_t i = 0; i < n_groups; ++i) {
2993 if (currentgroups[i].num == group_id.num) {
2994 splitgroupsize = currentsizes[i];
2995 //GSC_FREE(currentsizes);
2996 break;
2997 }
2998 }
2999 if (splitgroupsize == 0) {
3000 return 0;
3001 }
3002
3003 GSC_DELETE_BUFFER(currentsizes);
3004 size_t subgroupsfound = 0;
3005 GSC_CREATE_BUFFER(outgroups,gsc_GroupNum,splitgroupsize);
3006
3009 while (GSC_IS_VALID_LOCATION(loc)) {
3010 // Return group number if it should be assigned to an already-extant group. Otherwise return GSC_NO_GROUP and this generic caller function will allocated it one.
3011 gsc_GroupNum assignedgroup = somequality_tester(loc, somequality_data,
3012 splitgroupsize, subgroupsfound, outgroups);
3013
3014 if (assignedgroup.num == GSC_NO_GROUP.num) {
3015 nextgroup = gsc_get_next_free_group_num(n_groups,currentgroups,&bookmark,nextgroup);
3016 assignedgroup = nextgroup;
3017 outgroups[subgroupsfound] = nextgroup;
3018 subgroupsfound++;
3019 }
3020
3021 gsc_set_group(loc,assignedgroup);
3022
3023 loc = gsc_next_forwards(&it);
3024 }
3025
3026 GSC_DELETE_BUFFER(currentgroups);
3027 d->n_groups += subgroupsfound - 1;
3028
3029 if (maxentries_results < subgroupsfound) {
3030 memcpy(results,outgroups,sizeof(gsc_GroupNum)*maxentries_results);
3031 fprintf(stderr,"Output vector size is not large enough to hold all created groups: "
3032 " output list of gsc_GroupNums has been truncated\n");
3033 } else {
3034 memcpy(results,outgroups,sizeof(gsc_GroupNum)*subgroupsfound);
3035 }
3036 GSC_DELETE_BUFFER(outgroups);
3037 return subgroupsfound;
3038}
3039
3041 void* datastore,
3042 size_t maxgroups,
3043 size_t groupsfound,
3044 gsc_GroupNum* results,
3045 gsc_PedigreeID (*getparent)(gsc_GenoLocation)) {
3046 gsc_PedigreeID* familyidentities = (gsc_PedigreeID*) datastore;
3047
3048 for (size_t j = 0; j < groupsfound; ++j) {
3049 if (getparent(loc).id == familyidentities[j].id) {
3050 return results[j];
3051 }
3052 }
3053
3054 if (groupsfound > maxgroups) {
3055 fprintf(stderr, "Attempted to split into more groups than caller deemed possible. "
3056 "There is a bug in the simulation tool if you can reach this state.");
3057 return results[maxgroups-1]; // allocate all to the last group, possibly incorrectly.
3058 }
3059
3060 familyidentities[groupsfound] = getparent(loc);
3061 return GSC_NO_GROUP;
3062}
3063
3065 void* datastore,
3066 size_t maxgroups,
3067 size_t groupsfound,
3068 gsc_GroupNum* results) {
3069 return gsc_helper_split_by_quality_halfsibtemplate(loc,datastore,maxgroups,groupsfound,results,
3071}
3072
3074 void* datastore,
3075 size_t maxgroups,
3076 size_t groupsfound,
3077 gsc_GroupNum* results) {
3078 return gsc_helper_split_by_quality_halfsibtemplate(loc,datastore,maxgroups,groupsfound,results,
3080}
3081
3116 const gsc_GroupNum group_id,
3117 const int parent,
3118 size_t maxentries_results,
3119 gsc_GroupNum* results) {
3120 if (!(parent == 1 || parent == 2)) {
3121 fprintf(stderr, "Value error: `parent` must be 1 or 2.");
3122 results = NULL;
3123 return 0;
3124 }
3125
3126 //gsc_PedigreeID* familyidentities = gsc_malloc_wrap(sizeof(gsc_PedigreeID)*maxgroups);
3127 GSC_GLOBALX_T maxgroups = gsc_get_group_size(d, group_id); // sadinefficient we have to do this
3128 GSC_CREATE_BUFFER(familyidentities,gsc_PedigreeID,maxgroups);
3129
3130 size_t gcount;
3131 if (parent == 1) {
3132 gcount = gsc_scaffold_split_by_somequality(d, group_id, (void*)familyidentities,
3134 maxentries_results, results);
3135 } else {
3136 gcount = gsc_scaffold_split_by_somequality(d, group_id, (void*)familyidentities,
3138 maxentries_results, results);
3139 }
3140
3141 GSC_DELETE_BUFFER(familyidentities);
3142 return gcount;
3143}
3144
3146 void* datastore,
3147 size_t maxgroups,
3148 size_t groupsfound,
3149 gsc_GroupNum* results) {
3150 gsc_PedigreeID** familyidentities = (gsc_PedigreeID**) datastore;
3151
3152 for (size_t j = 0; j < groupsfound; ++j) {
3153 if (gsc_get_first_parent(loc).id == familyidentities[0][j].id &&
3154 gsc_get_second_parent(loc).id == familyidentities[1][j].id) {
3155 return results[j];
3156 }
3157 }
3158
3159 if (groupsfound > maxgroups) {
3160 fprintf(stderr, "Attempted to split into more groups than caller deemed possible. "
3161 "There is a bug in the simulation tool if you can reach this state.");
3162 return results[maxgroups-1]; // allocate all to the last group, possibly incorrectly.
3163 }
3164
3165 familyidentities[0][groupsfound] = gsc_get_first_parent(loc);
3166 familyidentities[1][groupsfound] = gsc_get_second_parent(loc);
3167 return GSC_NO_GROUP;
3168}
3169
3198 const gsc_GroupNum group_id,
3199 size_t maxentries_results,
3200 gsc_GroupNum* results) {
3201 gsc_PedigreeID* familyidentities[2];
3202 GSC_GLOBALX_T maxgroups = gsc_get_group_size(d, group_id); // sadinefficient we have to do this
3203 if (maxgroups < 2) {
3204 return 0;
3205 }
3206
3207 GSC_CREATE_BUFFER(p1identity,gsc_PedigreeID,maxgroups);
3208 GSC_CREATE_BUFFER(p2identity,gsc_PedigreeID,maxgroups);
3209 familyidentities[0] = p1identity;
3210 familyidentities[1] = p2identity;
3211
3212 size_t out = gsc_scaffold_split_by_somequality(d, group_id, (void*)familyidentities,
3214 maxentries_results, results);
3215
3216 GSC_DELETE_BUFFER(p1identity);
3217 GSC_DELETE_BUFFER(p2identity);
3218
3219 return out;
3220}
3221
3223 void* datastore,
3224 size_t maxgroups,
3225 size_t groupsfound,
3226 gsc_GroupNum* results) {
3227 return GSC_NO_GROUP;
3228}
3229
3255 const gsc_GroupNum group_id,
3256 size_t maxentries_results,
3257 gsc_GroupNum* results) {
3258 // **individuate** (verb): to make individuals of.
3259 // yeah sorry.
3260 return gsc_scaffold_split_by_somequality(d, group_id, NULL,
3262 maxentries_results, results);
3263}
3264
3265
3289 const gsc_GroupNum group_id) {
3290 // get the shuffle to be our even allocations
3291 GSC_GLOBALX_T size = gsc_get_group_size(d, group_id);
3292 if (size < 2) {
3293 if (size < 1) {
3294 fprintf(stderr,"Group %lu does not exist\n", (long unsigned int) group_id.num);
3295 } else {
3296 fprintf(stderr,"Group %lu has only one member so can't be split\n", (long unsigned int) group_id.num);
3297 }
3298 return GSC_NO_GROUP;
3299 }
3300
3301 GSC_GLOBALX_T even_half = size / 2;
3302 GSC_CREATE_BUFFER(allocations,GSC_GLOBALX_T,size);
3303 for (GSC_GLOBALX_T i = 0; i < size; ++i) {
3304 allocations[i] = i;
3305 }
3306 gsc_shuffle_up_to(&d->rng, allocations, sizeof(allocations[0]), size, even_half);
3307
3308 gsc_GroupNum new_group = gsc_get_new_group_num(d);
3309
3311 for (GSC_GLOBALX_T i = 0; i < even_half; ++i) {
3312 gsc_GenoLocation loc = gsc_next_get_nth(&it,allocations[i]);
3313 if (GSC_IS_VALID_LOCATION(loc)) {
3314 gsc_set_group(loc,new_group);
3315 }
3316 }
3317
3318 GSC_DELETE_BUFFER(allocations);
3320
3321 d->n_groups++;
3322 return new_group;
3323}
3324
3325
3344 const gsc_GroupNum group_id,
3345 void* someallocator_data,
3346 gsc_GroupNum (*someallocator)(gsc_GenoLocation,
3347 gsc_SimData*,
3348 void*,
3349 size_t,
3350 size_t*,
3351 gsc_GroupNum*),
3352 size_t n_outgroups,
3353 gsc_GroupNum* outgroups) {
3354
3355 // get the n group numbers
3356 gsc_get_n_new_group_nums(d, n_outgroups, outgroups);
3357
3358 size_t subgroupsfound = 0;
3359 GSC_GLOBALX_T allocationfailures = 0;
3360
3363 while (GSC_IS_VALID_LOCATION(loc)) {
3364 gsc_GroupNum assignedgroup = someallocator(loc, d, someallocator_data,
3365 n_outgroups, &subgroupsfound, outgroups);
3366 if (assignedgroup.num != GSC_NO_GROUP.num) {
3367 gsc_set_group(loc,assignedgroup);
3368 } else {
3369 allocationfailures++;
3370 }
3371
3372 loc = gsc_next_forwards(&it);
3373 }
3374
3375 if (subgroupsfound > 1) {
3376 d->n_groups += subgroupsfound - 1;
3377 }
3378 if (allocationfailures > 0) {
3379 fprintf(stderr,"While splitting group %lu, %lu allocations to new groups failed so they remain"
3380 " in the original group\n",
3381 (long unsigned int) group_id.num, (long unsigned int) allocationfailures);
3382 }
3383 return subgroupsfound;
3384
3385}
3386
3387
3389 gsc_SimData* d,
3390 void* datastore,
3391 size_t n_outgroups,
3392 size_t* subgroupsfound,
3393 gsc_GroupNum* outgroups) {
3394 GSC_GLOBALX_T* cumulative_counts = (GSC_GLOBALX_T*) datastore;
3395 *subgroupsfound = n_outgroups;
3396 // type note: may misbehave with large numbers because is just designed for ints
3397 int randpos = rnd_pcg_range(&d->rng,0,cumulative_counts[n_outgroups-1] - 1);
3398
3399 gsc_GroupNum chosengroup = GSC_NO_GROUP;
3400 size_t j = 0;
3401 for (; j < n_outgroups; ++j) {
3402 if (randpos < cumulative_counts[j]) {
3403 chosengroup = outgroups[j];
3404 break;
3405 }
3406 }
3407 for (; j < n_outgroups; ++j) {
3408 cumulative_counts[j]--;
3409 }
3410 return chosengroup;
3411}
3412
3435 const gsc_GroupNum group_id,
3436 const size_t n,
3437 gsc_GroupNum* results) {
3438 if (n <= 1) {
3439 fprintf(stderr, "Invalid n value: number of fractions into which to split group must be at least 2\n");
3440 return 0;
3441 }
3442
3443 GSC_GLOBALX_T size = gsc_get_group_size(d, group_id); // sadinefficient we have to do this.
3444
3445 // get the shuffle to be our even allocations
3446 GSC_GLOBALX_T each_size = size / n;
3447 GSC_GLOBALX_T extra = size % n;
3449 for (size_t i = 0; i < n; ++i) {
3450 boxes[i] = each_size;
3451 if (i < extra) {
3452 boxes[i]++;
3453 }
3454 if (i > 0) {
3455 boxes[i] += boxes[i-1];
3456 }
3457 }
3458
3459 size_t out;
3460 if (results == NULL) {
3461 GSC_CREATE_BUFFER(outgroups,gsc_GroupNum, n);
3462 out = gsc_scaffold_split_by_someallocation(d, group_id, (void*) boxes,
3464 n, outgroups);
3465 GSC_DELETE_BUFFER(outgroups);
3466 } else {
3467 out = gsc_scaffold_split_by_someallocation(d, group_id, (void*) boxes,
3469 n, results);
3470 }
3471 GSC_DELETE_BUFFER(boxes);
3472 return out;
3473}
3474
3512 const gsc_GroupNum group_id,
3513 const size_t n,
3514 const GSC_GLOBALX_T* counts,
3515 gsc_GroupNum* results) {
3516 if (n <= 1) {
3517 fprintf(stderr, "Invalid n value: number of fractions into which to split group must be at least 2\n");
3518 return 0;
3519 }
3520
3521 GSC_CREATE_BUFFER(cumulative_counts,GSC_GLOBALX_T,n);
3522 cumulative_counts[n-1] = gsc_get_group_size(d, group_id);
3523 GSC_GLOBALX_T sum = 0;
3524 for (size_t j = 0; j < n - 1; ++j) {
3525 sum += counts[j];
3526 cumulative_counts[j] = sum;
3527 }
3528 if (cumulative_counts[n-2] > cumulative_counts[n-1]) {
3529 fprintf(stderr, "Provided capacities are larger than actual group: some buckets will not be filled\n");
3530 }
3531
3532 size_t gcount;
3533 if (results == NULL) {
3534 GSC_CREATE_BUFFER(outgroups,gsc_GroupNum,n);
3535 gcount = gsc_scaffold_split_by_someallocation(d, group_id, (void*) cumulative_counts,
3537 n, outgroups);
3538 GSC_DELETE_BUFFER(outgroups);
3539 } else {
3540 gcount = gsc_scaffold_split_by_someallocation(d, group_id, (void*) cumulative_counts,
3542 n, results);
3543 }
3544
3545 GSC_DELETE_BUFFER(cumulative_counts);
3546 return gcount;
3547}
3548
3571 const gsc_GroupNum group_id) {
3572 gsc_GroupNum outGroup = gsc_get_new_group_num(d);
3573 _Bool anyFound = 0;
3574
3577 while (GSC_IS_VALID_LOCATION(loc)) {
3578 anyFound = 1;
3579 if (rnd_pcg_range(&d->rng,0,1)) {
3580 gsc_set_group(loc,outGroup);
3581 }
3582 loc = gsc_next_forwards(&it);
3583 }
3585
3586 if (anyFound) {
3587 d->n_groups++;
3588 return outGroup;
3589 } else {
3590 return GSC_NO_GROUP;
3591 }
3592}
3593
3594
3596 gsc_SimData* d,
3597 void* datastore,
3598 size_t n_outgroups,
3599 size_t* subgroupsfound,
3600 gsc_GroupNum* outgroups) {
3601 // consideration: will be an issue in C version if n_outgroups > INT_MAX.
3602 size_t randgroup = rnd_pcg_range(&d->rng,0,n_outgroups-1);
3603 if (randgroup < *subgroupsfound) {
3604 return outgroups[randgroup];
3605 } else {
3606 (*subgroupsfound)++;
3607 return outgroups[*subgroupsfound-1];
3608 }
3609}
3610
3636 const gsc_GroupNum group_id,
3637 const size_t n,
3638 gsc_GroupNum* results) {
3639 if (n <= 1) {
3640 fprintf(stderr, "Invalid n value: number of fractions in which to split group must be at least 2\n");
3641 return 0;
3642 }
3643
3644 size_t gcount;
3645 if (results == NULL) {
3646 GSC_CREATE_BUFFER(outgroups,gsc_GroupNum,n);
3647 gcount = gsc_scaffold_split_by_someallocation(d, group_id, NULL,
3649 n, outgroups);
3650 GSC_DELETE_BUFFER(outgroups);
3651 } else {
3652 gcount = gsc_scaffold_split_by_someallocation(d, group_id, NULL,
3654 n, results);
3655 }
3656 return gcount;
3657}
3658
3659
3661 gsc_SimData* d,
3662 void* datastore,
3663 size_t n_outgroups,
3664 size_t* subgroupsfound,
3665 gsc_GroupNum* outgroups) {
3666 double* cumulative_probs = (double*) datastore;
3667 *subgroupsfound = n_outgroups;
3668 double randdraw = rnd_pcg_nextf(&d->rng);
3669 for (size_t j = 0; j < n_outgroups; ++j) {
3670 if (randdraw < cumulative_probs[j]) {
3671 return outgroups[j];
3672 }
3673 }
3674 // This should not happen if cumulative probs are valid
3675 return GSC_NO_GROUP;
3676}
3677
3711 const gsc_GroupNum group_id,
3712 const size_t n,
3713 const double* probs,
3714 gsc_GroupNum* results) {
3715 if (n <= 1) {
3716 fprintf(stderr, "Invalid n value: number of fractions in which to split group must be at least 2\n");
3717 return 0;
3718 }
3719
3720 // Check the probabilities
3721 GSC_CREATE_BUFFER(cumulative_probs,double,n);
3722 cumulative_probs[n-1] = 1.0;
3723 double sum = 0;
3724 for (size_t j = 0; j < n-1; ++j) {
3725 sum += probs[j];
3726 cumulative_probs[j] = sum;
3727 if (cumulative_probs[j] >= 1) {
3728 fprintf(stderr, "Provided probabilities add up to 1 or more: some buckets will not be filled\n");
3729 for (; j < n-1; ++j) {
3730 cumulative_probs[j] = 1;
3731 }
3732 //don't bother to calculate more
3733 break;
3734 }
3735 }
3736
3737 size_t gcount;
3738 if (results == NULL) {
3739 GSC_CREATE_BUFFER(outgroups,gsc_GroupNum,n);
3740 gcount = gsc_scaffold_split_by_someallocation(d, group_id, (void*) cumulative_probs,
3742 n, outgroups);
3743 GSC_DELETE_BUFFER(outgroups);
3744 } else {
3745 gcount = gsc_scaffold_split_by_someallocation(d, group_id, (void*) cumulative_probs,
3747 n, results);
3748 }
3749
3750 GSC_DELETE_BUFFER(cumulative_probs);
3751 return gcount;
3752}
3753
3754
3777 return gsc_get_existing_group_counts(d, output, NULL);
3778}
3779
3797 GSC_CREATE_BUFFER(buckets,GSC_GLOBALX_T,d->n_groups+1); // this also creates bucketscap, initalised to d->n_groups+1.
3798 memset(buckets,0,sizeof(GSC_GLOBALX_T)*bucketscap);
3799 size_t filledbuckets = 0;
3800
3803 while (GSC_IS_VALID_LOCATION(loc)) {
3804 gsc_GroupNum g = gsc_get_group(loc);
3805 // Unless all group numbers are consecutive starting at 1, the buckets array will need to be resized at some point.
3806 if (g.num >= bucketscap) {
3807 size_t oldcap = bucketscap;
3808 size_t newbucketcapacity = bucketscap;
3809 while (g.num >= newbucketcapacity) {
3810 newbucketcapacity *= 2;
3811 }
3812 GSC_STRETCH_BUFFER(buckets,newbucketcapacity);
3813 if (g.num >= bucketscap) {
3814 fprintf(stderr,"Memory allocation failed. Not all groups found\n");
3815 break;
3816 }
3817 memset(buckets+oldcap,0,sizeof(GSC_GLOBALX_T)*(bucketscap-oldcap));
3818
3819 }
3820
3821 buckets[g.num] += 1;
3822 if (buckets[g.num] == 1) {
3823 ++filledbuckets;
3824 }
3825
3826 loc = gsc_next_forwards(&it);
3827 }
3828
3829 // Now save to output and sort.
3830 size_t capacity = filledbuckets;
3831 if (capacity > d->n_groups) {
3832 fprintf(stderr,"Found more groups than expected - gsc_SimData.n_groups is outdated somewhere."
3833 " Trimming output of get_existing_group_ to avoid a crash: not all groups may be shown\n");
3834 capacity = d->n_groups;
3835 }
3836 size_t g_index = 0;
3837 for (size_t i = 1; i < bucketscap; ++i) {
3838 if (buckets[i]) {
3839 /*if (g_index >= capacity) {
3840 fprintf(stderr,"Found more groups than just a moment ago.");
3841 --g_index;
3842 break;
3843 }*/
3844
3845 if (out_groups != NULL) {
3846 out_groups[g_index] = (gsc_GroupNum){.num=i};
3847 }
3848 if (out_sizes != NULL) {
3849 out_sizes[g_index] = buckets[i];
3850 }
3851 ++g_index;
3852 }
3853 }
3854
3855 //qsort(*out_groups, g_index, sizeof(gsc_GroupNum), gsc_helper_ascending_gsc_GroupNum_comparer);
3856 /*for (int i = 0; i < g_index; ++i) {
3857 (*out_sizes)[i] = buckets[(*out_groups)[i].num];
3858 }*/
3859 GSC_DELETE_BUFFER(buckets);
3860 d->n_groups = g_index;
3861
3862 return g_index;
3863}
3864
3865
3889gsc_GroupNum gsc_get_next_free_group_num(const size_t n_existing_groups,
3890 const gsc_GroupNum* existing_groups,
3891 size_t* cursor,
3892 gsc_GroupNum previous) {
3893 if (existing_groups == NULL) return GSC_NO_GROUP;
3894
3895 gsc_GroupNum nextgroup = (gsc_GroupNum){.num=previous.num+1};
3896 // a check here in case previous seems invalid. We need previous so we don't get stuck in a loop
3897 // of giving the same next 'free' number, but we know what a lower bound on its number should be
3898 // based on where the cursor is.
3899 if (*cursor > 0 && nextgroup.num <= existing_groups[(*cursor) - 1].num) {
3900 nextgroup.num = existing_groups[(*cursor) - 1].num + 1;
3901 }
3902
3903 while (*cursor < n_existing_groups) {
3904 if (nextgroup.num < existing_groups[*cursor].num) {
3905 break;
3906 }
3907
3908 ++(*cursor);
3909 ++nextgroup.num;
3910 }
3911 return nextgroup;
3912}
3913
3928 // Make sure we get all existing groups
3929 if (d->m == NULL || (d->m->n_genotypes == 0 && d->m->next == NULL)) {
3930 return (gsc_GroupNum){.num=1};
3931 }
3932
3933 size_t n_groups = (d->n_groups > 0) ? d->n_groups : 5;
3934 GSC_CREATE_BUFFER(existing_groups,gsc_GroupNum,n_groups);
3935 n_groups = gsc_get_existing_groups(d, existing_groups);
3936
3937 size_t i = 0;
3938 GSC_ID_T gn = 1;
3939
3940 while (i < n_groups) {
3941 if (gn < existing_groups[i].num) {
3942 break;
3943 }
3944
3945 ++i;
3946 ++gn;
3947 }
3948 GSC_DELETE_BUFFER(existing_groups);
3949 return (gsc_GroupNum){.num=gn};
3950}
3951
3952
3962 const size_t n,
3963 gsc_GroupNum* result) {
3964 // Make sure we get all existing groups
3965 size_t n_groups;
3966 GSC_CREATE_BUFFER(existing_groups,gsc_GroupNum,d->n_groups);
3967 n_groups = gsc_get_existing_groups(d, existing_groups);
3968
3969 size_t existingi = 0;
3970 GSC_ID_T gn = 0;
3971
3972 // i: current index of `results` (the array of currently empty group numbers)
3973 // gn: group number being checked against existing_groups. if not in there is added to
3974 // the list of results
3975 // existingi: current index of existing_groups
3976 for (size_t i = 0; i < n; ++i) {
3977 ++gn;
3978 while (existingi < n_groups) {
3979 if (gn < existing_groups[existingi].num) {
3980 break;
3981 }
3982
3983 ++existingi;
3984 ++gn;
3985 }
3986 result[i] = (gsc_GroupNum){.num=gn};
3987 }
3988 GSC_DELETE_BUFFER(existing_groups);
3989}
3990
3999 // label_ids must be in sequential order
4000 gsc_LabelID new = {.id=1};
4001 GSC_ID_T i = 0;
4002
4003 while (i < d->n_labels) {
4004 if (new.id < d->label_ids[i].id) {
4005 break;
4006 }
4007
4008 ++i;
4009 ++(new.id);
4010 }
4011
4012 return new;
4013}
4014
4023 // label_ids must be in sequential order
4024 gsc_EffectID new = { .id=1 };
4025 GSC_ID_T i = 0;
4026
4027 while (i < d->n_eff_sets) {
4028 if (new.id < d->eff_set_ids[i].id) {
4029 break;
4030 }
4031
4032 ++i;
4033 ++(new.id);
4034 }
4035
4036 return new;
4037}
4038
4047 // map IDs must be in sequential order
4048 gsc_MapID new = { .id=1 };
4049 GSC_ID_T i = 0;
4050
4051 while (i < d->genome.n_maps) {
4052 if (new.id < d->genome.map_ids[i].id) {
4053 break;
4054 }
4055
4056 ++i;
4057 ++(new.id);
4058 }
4059
4060 return new;
4061}
4062
4063
4073 if (d->n_labels == 0) { return GSC_NA_IDX; } // immediate fail
4074 if (d->n_labels == 1) { return (d->label_ids[0].id == label.id) ? 0 : GSC_NA_IDX ; }
4075 if (label.id == GSC_NO_LABEL.id) { return GSC_NA_IDX; }
4076
4077 // If there's at least two labels then we binary search.
4078 GSC_ID_T first = 0;
4079 GSC_ID_T last = d->n_labels;
4080 GSC_ID_T mid;
4081
4082 while (first <= last) {
4083 mid = (first + last) / 2;
4084
4085 if (d->label_ids[mid].id == label.id) {
4086 return mid;
4087 } else if (d->label_ids[mid].id < label.id) {
4088 first = mid + 1;
4089 } else {
4090 last = mid - 1;
4091 }
4092
4093 }
4094
4095 return GSC_NA_IDX;
4096}
4097
4106 if (d->n_eff_sets == 0) { return GSC_NA_IDX; } // immediate fail
4107 if (d->n_eff_sets == 1) { return (d->eff_set_ids[0].id == eff_set_id.id) ? 0 : GSC_NA_IDX ; }
4108 if (eff_set_id.id == GSC_NO_EFFECTSET.id) { return GSC_NA_IDX; }
4109
4110 // If there's at least two labels then we binary search.
4111 GSC_ID_T first = 0;
4112 GSC_ID_T last = d->n_eff_sets;
4113 GSC_ID_T mid;
4114
4115 while (first <= last) {
4116 mid = (first + last) / 2;
4117
4118 if (d->eff_set_ids[mid].id == eff_set_id.id) {
4119 return mid;
4120 } else if (d->eff_set_ids[mid].id < eff_set_id.id) {
4121 first = mid + 1;
4122 } else {
4123 last = mid - 1;
4124 }
4125
4126 }
4127
4128 return GSC_NA_IDX;
4129}
4130
4139 if (d->genome.n_maps == 0) { return GSC_NA_IDX; } // immediate fail
4140 if (d->genome.n_maps == 1) { return (d->genome.map_ids[0].id == map.id) ? 0 : GSC_NA_IDX ; }
4141 if (map.id == GSC_NO_MAP.id) { return GSC_NA_IDX; }
4142
4143 // If there's at least two labels then we binary search.
4144 GSC_ID_T first = 0;
4145 GSC_ID_T last = d->genome.n_maps;
4146 GSC_ID_T mid;
4147
4148 while (first <= last) {
4149 mid = (first + last) / 2;
4150
4151 if (d->genome.map_ids[mid].id == map.id) {
4152 return mid;
4153 } else if (d->genome.map_ids[mid].id < map.id) {
4154 first = mid + 1;
4155 } else {
4156 last = mid - 1;
4157 }
4158
4159 }
4160
4161 return GSC_NA_IDX;
4162}
4163
4164//-----------------------------------Data Access-----------------------------------
4165
4181 if (group_id.num == GSC_NO_GROUP.num) {
4182 return 0; // it is not a group so it does not have a size
4183 }
4184 const gsc_AlleleMatrix* m = d->m;
4185 GSC_GLOBALX_T size = 0;
4186 while (m != NULL) {
4187 for (GSC_LOCALX_T i = 0; i < m->n_genotypes; ++i) {
4188 if (m->groups[i].num == group_id.num) {
4189 ++size;
4190 }
4191 }
4192
4193 m = m->next;
4194 }
4195 return size;
4196}
4197
4216 const gsc_GroupNum group_id,
4217 GSC_GLOBALX_T group_size,
4218 char** output) {
4219 const gsc_AlleleMatrix* m = d->m;
4220 GSC_GLOBALX_T outix = 0;
4221 while (m != NULL) {
4222 for (GSC_LOCALX_T i = 0; i < m->n_genotypes; ++i) {
4223 if (m->groups[i].num == group_id.num) {
4224 output[outix] = m->alleles[i];
4225 ++outix;
4226 if (outix == group_size) {
4227 return outix;
4228 }
4229 }
4230 }
4231
4232 m = m->next;
4233 }
4234 return outix;
4235}
4236
4254 const gsc_GroupNum group_id,
4255 GSC_GLOBALX_T group_size,
4256 char** output) {
4257 const gsc_AlleleMatrix* m = d->m;
4258 GSC_GLOBALX_T outix = 0;
4259 while (m != NULL) {
4260 for (GSC_LOCALX_T i = 0; i < m->n_genotypes; ++i) {
4261 if (m->groups[i].num == group_id.num) {
4262 output[outix] = m->names[i];
4263 ++outix;
4264 if (outix == group_size) {
4265 return outix;
4266 }
4267 }
4268 }
4269
4270 m = m->next;
4271 }
4272 return outix;
4273}
4274
4292 const gsc_GroupNum group_id,
4293 GSC_GLOBALX_T group_size,
4294 gsc_PedigreeID *output) {
4295 const gsc_AlleleMatrix* m = d->m;
4296 GSC_GLOBALX_T outix = 0;
4297 while (m != NULL) {
4298 for (GSC_LOCALX_T i = 0; i < m->n_genotypes; ++i) {
4299 if (m->groups[i].num == group_id.num) {
4300 output[outix] = m->ids[i];
4301 ++outix;
4302 if (outix == group_size) {
4303 return outix;
4304 }
4305 }
4306 }
4307
4308 m = m->next;
4309 }
4310 return outix;
4311}
4312
4330 const gsc_GroupNum group_id,
4331 GSC_GLOBALX_T group_size,
4332 GSC_GLOBALX_T* output) {
4333 const gsc_AlleleMatrix* m = d->m;
4334 GSC_GLOBALX_T total_i = 0, outix = 0;
4335 while (m != NULL) {
4336 for (GSC_LOCALX_T i = 0; i < m->n_genotypes; ++i, ++total_i) {
4337 if (m->groups[i].num == group_id.num) {
4338 output[outix] = total_i;
4339 ++outix;
4340 if (outix == group_size) {
4341 return outix;
4342 }
4343 }
4344 }
4345
4346 m = m->next;
4347 }
4348 return outix;
4349}
4350
4369 const gsc_GroupNum group_id,
4370 const gsc_EffectID effID,
4371 GSC_GLOBALX_T group_size,
4372 double* output) {
4373 gsc_DecimalMatrix dm_bvs = gsc_calculate_bvs(d, group_id, effID );
4374
4375 if (group_size == 0 || group_size == GSC_NA_GLOBALX) {
4376 group_size = dm_bvs.dim2;
4377 }
4378
4379 memcpy(output, dm_bvs.matrix[0], sizeof(*output)*group_size);
4380
4381 gsc_delete_dmatrix(&dm_bvs);
4382 return group_size;
4383}
4384
4405 const gsc_GroupNum group_id,
4406 GSC_GLOBALX_T group_size,
4407 const int whichParent,
4408 gsc_PedigreeID* output) {
4409 if (!(whichParent == 1 || whichParent == 2)) {
4410 fprintf(stderr, "Value error: `parent` must be 1 or 2.");
4411 return GSC_NA_GLOBALX;
4412 }
4413 int parent = whichParent - 1;
4414
4415 const gsc_AlleleMatrix* m = d->m;
4416 GSC_GLOBALX_T outix = 0;
4417 while (m != NULL) {
4418 for (GSC_LOCALX_T i = 0; i < m->n_genotypes; ++i) {
4419 if (m->groups[i].num == group_id.num) {
4420 output[outix] = m->pedigrees[parent][i];
4421 ++outix;
4422 if (outix == group_size) {
4423 return outix;
4424 }
4425 }
4426 }
4427
4428 m = m->next;
4429 }
4430 return outix;
4431}
4432
4453 const gsc_GroupNum group_id,
4454 GSC_GLOBALX_T group_size,
4455 const int whichParent,
4456 char** output) {
4457 if (!(whichParent == 1 || whichParent == 2)) {
4458 fprintf(stderr, "Value error: `parent` must be 1 or 2.");
4459 return GSC_NA_GLOBALX;
4460 }
4461 int parent = whichParent - 1;
4462
4463 const gsc_AlleleMatrix* m = d->m;
4464 GSC_GLOBALX_T outix = 0;
4465 while (m != NULL) {
4466 for (GSC_LOCALX_T i = 0; i < m->n_genotypes; ++i) {
4467 if (m->groups[i].num == group_id.num) {
4468 if (m->pedigrees[parent][i].id != GSC_NO_PEDIGREE.id) {
4469 output[outix] = gsc_get_name_of_id(d->m, m->pedigrees[parent][i]);
4470 } else {
4471 output[outix] = NULL;
4472 }
4473 ++outix;
4474 if (outix == group_size) {
4475 return outix;
4476 }
4477 }
4478 }
4479
4480 m = m->next;
4481 }
4482 return outix;
4483}
4484
4515 const gsc_GroupNum group_id,
4516 GSC_GLOBALX_T group_size,
4517 char** output) {
4518 char* fname = "gS_gpptmp";
4519 gsc_save_pedigrees(fname,d,group_id,GSC_TRUE);
4520
4521 FILE* fp2;
4522 if ((fp2 = fopen(fname, "r")) == NULL) {
4523 fprintf(stderr, "Failed to use temporary file\n");
4524 return GSC_NA_GLOBALX;
4525 }
4526
4527 // Create the list that we will return
4528 if (group_size == 0 || group_size == GSC_NA_GLOBALX) {
4529 group_size = gsc_get_group_size( d, group_id );
4530 if (group_size == 0) { return 0; }
4531 }
4532
4533 // read one line at a time
4534 //size_t n;
4535 //int line_len;
4536 unsigned int size;
4537 unsigned int index;
4538 int nextc;
4539 for (GSC_GLOBALX_T i = 0; i < group_size; ++i) {
4540 // getline is not available in MinGW it looks like (AUG 2021)
4541 /*gp_ped[i] = NULL;
4542 if ((line_len = getline(&(gp_ped[i]), &n, fp2)) == -1) {
4543 error("Failed to get %d th pedigree\n", i);
4544 }
4545 // remove the newline character
4546 if (gp_ped[i][line_len - 1] == '\n') {
4547 gp_ped[i][line_len - 1] = '\0';
4548 }*/
4549
4550 // a not-very-size-efficient, fgets-based line getter
4551 size = 50;
4552 index = 0;
4553 output[i] = gsc_malloc_wrap(sizeof(char) * size,GSC_TRUE);
4554 while ((nextc = fgetc(fp2)) != '\n' && nextc != EOF) {
4555 output[i][index] = nextc;
4556 ++index;
4557
4558 if (index >= size) {
4559 size *= 2;
4560 char* temp = realloc(output[i], sizeof(char) * size);
4561 if (temp == NULL) {
4562 GSC_FREE(output[i]);
4563 fprintf(stderr, "Memory allocation of size %u failed.\n", size);
4564 output[i] = NULL;
4565 } else {
4566 output[i] = temp;
4567 }
4568 }
4569 }
4570 output[i][index] = '\0';
4571 }
4572
4573 fclose(fp2);
4574 remove(fname);
4575
4576 return group_size;
4577}
4578
4579/*---------------------- matrix-operations.c dregs -------------------*/
4580
4588gsc_DecimalMatrix gsc_generate_zero_dmatrix(const size_t r, const size_t c) {
4589 gsc_DecimalMatrix zeros;
4590 zeros.dim1 = r;
4591 zeros.dim2 = c;
4592
4593 if (r > 0) {
4594 zeros.matrix = gsc_malloc_wrap(sizeof(*zeros.matrix) * r,GSC_TRUE);
4595 if (c > 0) {
4596 for (size_t i = 0; i < r; ++i) {
4597 zeros.matrix[i] = gsc_malloc_wrap(sizeof(*(zeros.matrix[i])) * c,GSC_TRUE);
4598 for (size_t j = 0; j < c; ++j) {
4599 zeros.matrix[i][j] = 0.0;
4600 }
4601 }
4602 } else {
4603 for (size_t i = 0; i < r; ++i) {
4604 zeros.matrix[i] = NULL;
4605 }
4606 }
4607 } else {
4608 zeros.matrix = NULL;
4609 }
4610 return zeros;
4611}
4612
4619 if (m->matrix != NULL) {
4620 for (size_t i = 0; i < m->dim1; i++) {
4621 if (m->matrix[i] != NULL) {
4622 GSC_FREE(m->matrix[i]);
4623 }
4624 }
4625 GSC_FREE(m->matrix);
4626 m->matrix = NULL;
4627 }
4628 m->dim1 = 0;
4629 m->dim2 = 0;
4630}
4631
4632
4633/*--------------------------------Deleting-----------------------------------*/
4634
4649 gsc_AlleleMatrix* m = d->m;
4650 GSC_GLOBALX_T total_deleted = 0;
4651 while (m != NULL) {
4652 GSC_LOCALX_T deleted = 0;
4653 for (GSC_LOCALX_T i = 0; i < m->n_genotypes; ++i) {
4654 if (m->groups[i].num == group_id.num) {
4655 // delete data
4656 if (m->names[i] != NULL) {
4657 GSC_FREE(m->names[i]);
4658 m->names[i] = NULL;
4659 }
4660 if (m->alleles[i] != NULL) {
4661 GSC_FREE(m->alleles[i]);
4662 m->alleles[i] = NULL;
4663 }
4664 m->ids[i] = GSC_NO_PEDIGREE;
4665 m->pedigrees[0][i] = GSC_NO_PEDIGREE;
4666 m->pedigrees[1][i] = GSC_NO_PEDIGREE;
4667 m->groups[i] = GSC_NO_GROUP;
4668 ++deleted;
4669 }
4670 }
4671 m->n_genotypes -= deleted;
4672 total_deleted += deleted;
4673
4674 m = m->next;
4675 }
4676 printf("%lu genotypes were deleted\n", (long unsigned int) total_deleted);
4677 if (total_deleted > 0) {
4679 d->n_groups--;
4680 }
4681}
4682
4691 GSC_ID_T which_ix = gsc_get_index_of_eff_set(d, effID);
4692 if (which_ix == GSC_NA_LOCALX) {
4693 fprintf(stderr, "Nonexistent effect set %lu\n", (long unsigned int) effID.id);
4694 return;
4695 }
4696
4697 if (d->n_eff_sets == 1) {
4699 d->n_eff_sets = 0;
4700 GSC_FREE(d->e);
4702 d->e = NULL;
4703 d->eff_set_ids = NULL;
4704 } else {
4705 d->n_eff_sets--;
4706
4707 gsc_delete_effects_table(d->e + which_ix);
4708 gsc_MarkerEffects* newE = gsc_malloc_wrap(sizeof(*d->e)*d->n_eff_sets,GSC_FALSE);
4709 if (newE == NULL) {
4710 gsc_MarkerEffects cleared = d->e[which_ix];
4711 for (GSC_ID_T i = which_ix; i < d->n_eff_sets-1; ++i) {
4712 d->e[i] = d->e[i+1];
4713 }
4714 d->e[d->n_eff_sets] = cleared;
4715 } else {
4716 memcpy(newE, d->e, sizeof(*d->e)*which_ix);
4717 memcpy(newE + which_ix, d->e + which_ix + 1, sizeof(*d->e)*(d->n_eff_sets - which_ix));
4718 GSC_FREE(d->e);
4719 d->e = newE;
4720 }
4721
4723 if (newIDs == NULL) {
4724 for (GSC_ID_T i = which_ix; i < d->n_eff_sets-1; ++i) {
4725 d->eff_set_ids[i] = d->eff_set_ids[i+1];
4726 }
4728 } else {
4729 memcpy(newIDs, d->eff_set_ids, sizeof(*d->eff_set_ids)*which_ix);
4730 memcpy(newIDs + which_ix, d->eff_set_ids + which_ix + 1, sizeof(*d->eff_set_ids)*(d->n_eff_sets - which_ix));
4732 d->eff_set_ids = newIDs;
4733 }
4734 }
4735}
4736
4744void gsc_delete_label(gsc_SimData* d, const gsc_LabelID which_label) {
4745 GSC_ID_T label_ix;
4746 if (which_label.id == GSC_NO_LABEL.id ||
4747 (label_ix = gsc_get_index_of_label(d, which_label)) == GSC_NA_LOCALX) {
4748 fprintf(stderr, "Nonexistent label %lu\n", (long unsigned int)which_label.id);
4749 return;
4750 }
4751
4752 if (d->n_labels == 1) {
4753 // Delete 'em all
4754 d->n_labels = 0;
4755 GSC_FREE(d->label_ids);
4756 d->label_ids = NULL;
4758 d->label_defaults = NULL;
4759
4760 gsc_AlleleMatrix* m = d->m;
4761 while (m != NULL) {
4762
4763 GSC_FREE(m->labels[0]);
4764 GSC_FREE(m->labels);
4765 m->labels = NULL;
4766
4767 m = m->next;
4768 }
4769
4770 } else {
4771 // Reduce the list of labels in the gsc_SimData
4772 gsc_LabelID* new_label_ids = gsc_malloc_wrap(sizeof(gsc_LabelID) * (d->n_labels - 1),GSC_FALSE);
4773 if (new_label_ids == NULL) {
4774 for (GSC_ID_T i = label_ix; i < d->n_labels - 1; ++i) {
4775 d->label_ids[i] = d->label_ids[i+1];
4776 }
4778 } else {
4779 memcpy(new_label_ids,d->label_ids,sizeof(*d->label_ids)*label_ix);
4780 memcpy(new_label_ids + label_ix,d->label_ids + label_ix + 1, sizeof(*d->label_ids)*(d->n_labels - 1 - label_ix));
4781 GSC_FREE(d->label_ids);
4782 d->label_ids = new_label_ids;
4783 }
4784
4785 int* new_label_defaults = gsc_malloc_wrap(sizeof(int) * (d->n_labels - 1),GSC_FALSE);
4786 if (new_label_defaults == NULL) {
4787 for (GSC_ID_T i = label_ix; i < d->n_labels - 1; ++i) {
4788 d->label_defaults[i] = d->label_defaults[i+1];
4789 }
4790 // no need to overwrite default
4791 } else {
4792 memcpy(new_label_defaults,d->label_defaults,sizeof(*d->label_defaults)*label_ix);
4793 memcpy(new_label_defaults + label_ix,d->label_defaults + label_ix + 1, sizeof(*d->label_defaults)*(d->n_labels - 1 - label_ix));
4795 d->label_defaults = new_label_defaults;
4796 }
4797 d->n_labels --;
4798
4799 // Remove the label from the gsc_AlleleMatrix linked list
4800 gsc_AlleleMatrix* m = d->m;
4801 while (m != NULL) {
4802 GSC_FREE(m->labels[label_ix]);
4803
4804 m->n_labels = d->n_labels;
4805 int** new_label_lookups = gsc_malloc_wrap(sizeof(int*) * (m->n_labels),GSC_FALSE);
4806 if (new_label_lookups == NULL) {
4807 for (GSC_ID_T i = label_ix; i < m->n_labels; ++i) {
4808 m->labels[i] = m->labels[i+1];
4809 }
4810 m->labels[m->n_labels + 1] = NULL;
4811 } else {
4812 memcpy(new_label_lookups, m->labels, sizeof(*m->labels)*label_ix);
4813 memcpy(new_label_lookups + label_ix, m->labels + label_ix + 1, sizeof(*m->labels)*(m->n_labels - label_ix));
4814 GSC_FREE(m->labels);
4815 m->labels = new_label_lookups;
4816 }
4817
4818 m = m->next;
4819 }
4820 }
4821}
4822
4830 if (g->marker_names != NULL) {
4831 for (GSC_GENOLEN_T i = 0; i < g->n_markers; i++) {
4832 if (g->marker_names[i] != NULL) {
4833 GSC_FREE(g->marker_names[i]);
4834 }
4835 }
4837 g->marker_names = NULL;
4838 }
4839 if (g->names_alphabetical != NULL) {
4841 g->names_alphabetical = NULL;
4842 }
4843 g->n_markers = 0;
4844
4845 if (g->map_ids != NULL) {
4846 GSC_FREE(g->map_ids);
4847 g->map_ids = NULL;
4848 }
4849
4850 if (g->maps != NULL) {
4851 for (GSC_ID_T i = 0; i < g->n_maps; ++i) {
4853 }
4854 GSC_FREE(g->maps);
4855 g->maps = NULL;
4856 }
4857 g->n_maps = 0;
4858}
4859
4868 GSC_ID_T map_ix;
4869 if (which_map.id == GSC_NO_LABEL.id || (map_ix = gsc_get_index_of_map(d, which_map)) == GSC_NA_IDX) {
4870 fprintf(stderr, "Nonexistent recombination map %lu\n", (long unsigned int) which_map.id);
4871 return;
4872 }
4873
4874 if (d->genome.n_maps == 1) {
4876 d->genome.map_ids = NULL;
4878 GSC_FREE(d->genome.maps);
4879 d->genome.maps = NULL;
4880 d->genome.n_maps = 0;
4881 } else {
4882 d->genome.n_maps--;
4885 if (tmplist == NULL) {
4886 gsc_RecombinationMap clearedmap = d->genome.maps[map_ix];
4887 for (GSC_ID_T i = map_ix; i < d->genome.n_maps - 1; ++i) {
4888 d->genome.maps[i] = d->genome.maps[i+1];
4889 }
4890 d->genome.maps[d->genome.n_maps] = clearedmap;
4891 } else {
4892 memcpy(tmplist, d->genome.maps, sizeof(*d->genome.maps)*map_ix);
4893 memcpy(tmplist + map_ix, d->genome.maps + map_ix + 1, sizeof(*d->genome.maps)*(d->genome.n_maps - map_ix));
4894 GSC_FREE(d->genome.maps);
4895 d->genome.maps = tmplist;
4896 }
4897
4898 gsc_MapID* tmpids = gsc_malloc_wrap(sizeof(*d->genome.map_ids)*d->genome.n_maps, GSC_FALSE);
4899 if (tmpids == NULL) {
4900 for (GSC_ID_T i = map_ix; i < d->genome.n_maps - 1; ++i) {
4901 d->genome.map_ids[i] = d->genome.map_ids[i+1];
4902 }
4904 } else {
4905 memcpy(tmpids, d->genome.map_ids, sizeof(*d->genome.map_ids)*map_ix);
4906 memcpy(tmpids + map_ix, d->genome.map_ids + map_ix + 1, sizeof(*d->genome.map_ids)*(d->genome.n_maps - map_ix));
4908 d->genome.map_ids = tmpids;
4909 }
4910 }
4911}
4912
4923 if (m->chrs != NULL) {
4924 for (GSC_GENOLEN_T i = 0; i < m->n_chr; ++i) {
4925 switch (m->chrs[i].type) {
4926 case GSC_LINKAGEGROUP_SIMPLE:
4929 m->chrs[i].map.simple.n_markers = 0;
4930 if (m->chrs[i].map.simple.dists != NULL) {
4931 GSC_FREE(m->chrs[i].map.simple.dists);
4932 m->chrs[i].map.simple.dists = NULL;
4933 }
4934 break;
4935 case GSC_LINKAGEGROUP_REORDER:
4937 m->chrs[i].map.reorder.n_markers = 0;
4938 if (m->chrs[i].map.reorder.dists != NULL) {
4939 GSC_FREE(m->chrs[i].map.reorder.dists);
4940 m->chrs[i].map.reorder.dists = NULL;
4941 }
4942 if (m->chrs[i].map.reorder.marker_indexes != NULL) {
4944 m->chrs[i].map.reorder.marker_indexes = NULL;
4945 }
4946 break;
4947 }
4948 }
4949 GSC_FREE(m->chrs);
4950 m->chrs = NULL;
4951 }
4952 if (m->chr_names != NULL) {
4953 for (GSC_GENOLEN_T i = 0; i < m->n_chr; ++i) {
4954 GSC_FREE(m->chr_names[i]);
4955 }
4956 GSC_FREE(m->chr_names);
4957 m->chr_names = NULL;
4958 }
4959 m->n_chr = 0;
4960}
4961
4971 if (m == NULL) {
4972 return;
4973 }
4974 gsc_AlleleMatrix* next;
4975 while (m != NULL) {
4976 /* free the big data matrix */
4977 for (GSC_LOCALX_T i = 0; i < CONTIG_WIDTH; i++) {
4978 if (m->alleles[i] != NULL) {
4979 GSC_FREE(m->alleles[i]);
4980 }
4981
4982 }
4983
4984 // free names
4985 for (GSC_LOCALX_T i = 0; i < CONTIG_WIDTH; i++) {
4986 if (m->names[i] != NULL) {
4987 GSC_FREE(m->names[i]);
4988 }
4989 }
4990
4991 // free labels
4992 for (GSC_ID_T i = 0; i < m->n_labels; ++i) {
4993 if (m->labels[i] != NULL) {
4994 GSC_FREE(m->labels[i]);
4995 }
4996 }
4997 if (m->labels != NULL) {
4998 GSC_FREE(m->labels);
4999 }
5000
5001 next = m->next;
5002 GSC_FREE(m);
5003 m = next;
5004 }
5005}
5006
5012 m->n_markers = 0;
5013 if (m->centre != NULL) {
5014 GSC_FREE(m->centre);
5015 m->centre = NULL;
5016 }
5017 if (m->cumn_alleles != NULL) {
5019 m->cumn_alleles = NULL;
5020 }
5021 if (m->allele != NULL) {
5022 GSC_FREE(m->allele);
5023 m->allele = NULL;
5024 }
5025 if (m->eff != NULL) {
5026 GSC_FREE(m->eff);
5027 m->eff = NULL;
5028 }
5029}
5030
5038 if (m != NULL) {
5039 // clear_simdata does a tiny bit of extra work compared to just freeing everything (by setting to 0)
5040 // but it's minor, and you won't be calling this function in a tight loop anyway
5042 GSC_FREE(m);
5043 }
5044}
5045
5056 for (GSC_ID_T i = 0; i < b->num_blocks; ++i) {
5058 }
5060 b->markers_in_block = NULL;
5062 b->num_markers_in_block = NULL;
5063 b->num_blocks = 0;
5064
5065 return;
5066}
5067
5068
5083 it->am = NULL;
5084 //it->group = GSC_NO_GROUP;
5085 it->localPos = GSC_NA_LOCALX;
5086 it->cachedAM = NULL;
5087 it->cachedAMIndex = UINT_MAX;
5088 it->atEnd = GSC_TRUE;
5089 it->atStart = GSC_TRUE;
5090}
5091
5104 it->d = NULL;
5105 //it->group = GSC_NO_GROUP;
5106 if (it->cacheSize > 0) {
5107 GSC_FREE(it->cache);
5108 }
5109 it->cache = NULL;
5110 it->cacheSize = 0;
5112 it->groupSize = 0;
5113}
5114
5115/*-------------------------------gsc_SimData loaders-----------------------------*/
5116
5122 FILE* fp;
5123 if ((fp = fopen(filename, "r")) == NULL) {
5124 fprintf(stderr, "Failed to open file %s.\n", filename);
5125 }
5126
5127 gsc_TableFileReader tfr = { .fp = fp,
5128 .buf = { 0 },
5129 .buf_fill = 0,
5130 .cursor = 0,
5131 };
5132
5133 if (fp != NULL) {
5134 tfr.buf_fill = fread(tfr.buf,1,sizeof(tfr.buf),fp);
5135 }
5136 return tfr;
5137}
5138
5142 if (tbl->fp != NULL) { fclose(tbl->fp); }
5143 tbl->fp = NULL;
5144}
5145
5154 tbl->cursor = 0;
5155 if (tbl->fp != NULL) {
5156 tbl->buf_fill = fread(tbl->buf,1,sizeof(tbl->buf),tbl->fp);
5157 } else {
5158 tbl->buf_fill = 0;
5159 }
5160}
5161
5168 if (tbl->buf_fill <= tbl->cursor) {
5169 if (tbl->buf_fill < sizeof(tbl->buf)) { // last read did not fill the entire buffer
5171 }
5173 }
5174
5175 switch (tbl->buf[tbl->cursor]) {
5176 case '\r': // allow '\r' or '\r\n' as end of lines. also allow '\n' as end of line (see following case)
5177 case '\n':
5178 return GSC_TABLEFILE_NEWLINE;
5179 case '\t':
5180 case ' ':
5181 case ',':
5183 default:
5185 }
5186}
5187
5197 if (c->cell_len > 0 && c->isCellShallow) {
5198 char* deepcell = gsc_malloc_wrap(sizeof(char)*(c->cell_len+1), GSC_TRUE);
5199 memcpy(deepcell,c->cell,sizeof(char)*c->cell_len);
5200 deepcell[c->cell_len] = '\0';
5201 c->cell = deepcell;
5203 }
5204}
5205
5212 gsc_TableFileCell cur = { .isCellShallow = GSC_TRUE, .cell = NULL, .cell_len = 0,
5213 .predCol = 0, .predNewline = 0, .eof = GSC_FALSE };
5214
5215 GSC_CREATE_BUFFER(tmpcell,char,1);
5216 size_t tmpix = 0;
5217 size_t tblbuf_offset = 0;
5218 size_t tblbuf_len = 0;
5219 int predCarriageReturn = 0; // for detecting /r/n as a single "newline"
5220 _Bool warned = 0;
5221
5222 while (1) {
5224 if (0 < predCarriageReturn) { --predCarriageReturn; } // decremented each time step
5225
5226 if (0 == cur.cell_len) {
5227 switch (type) {
5229 if (tbl->buf[tbl->cursor] == '\r') {
5230 predCarriageReturn = 2; // will have value 1 at next loop iteration, then will fall back to 0
5231 }
5232 if (!(predCarriageReturn && tbl->buf[tbl->cursor] == '\n')) {
5233 ++cur.predNewline;
5234 }
5235 cur.predCol = 0;
5236 ++tbl->cursor;
5237 break;
5238
5240 ++tbl->cursor;
5241 ++cur.predCol;
5242 break;
5243
5245 // just refill as we have no contents we need to save yet
5247 if (0 < predCarriageReturn) { ++predCarriageReturn; } // should not tick down the counter this loop iteration
5248 break;
5249
5251 tblbuf_offset = tbl->cursor; tblbuf_len = 1; // in case we need to make a deep copy later.
5252 cur.cell = tbl->buf + tbl->cursor;
5253 ++cur.cell_len;
5254 ++tbl->cursor;
5255 break;
5256
5257 default:
5258 ++tbl->cursor;
5259 cur.eof = GSC_TRUE;
5260 return cur;
5261 }
5262
5263 } else { // have found the cell, just need to read the rest of it
5264 switch (type) {
5266 ++tbl->cursor;
5267 ++tblbuf_len;
5268 ++cur.cell_len;
5269 break;
5270
5273
5274 if (!warned && tblbuf_len > 8192) {
5275 warned = 1;
5276 fprintf(stderr,"Warning: very long cell identified beginning %c%c%c%c%c%c. Column separators may have failed to be recognised\n",
5277 tmpcell[0],tmpcell[1],tmpcell[2],tmpcell[3],tmpcell[4],tmpcell[5]);
5278 }
5279
5280 GSC_STRETCH_BUFFER(tmpcell,tmpix + tblbuf_len + 1);
5281 memcpy(tmpcell+tmpix,tbl->buf+tblbuf_offset,sizeof(char)*tblbuf_len);
5282 tmpix += tblbuf_len;
5283 tmpcell[tmpix] = '\0';
5284
5285 tblbuf_offset = 0; tblbuf_len = 0;
5287 break;
5288
5290 ++tbl->cursor;
5291 cur.eof = GSC_TRUE;
5292 // fall through
5293 default: // newline or column gap or end of file discovered: save and return.
5294 if (!cur.isCellShallow) {
5295 cur.cell = gsc_malloc_wrap(sizeof(char)*(cur.cell_len + 1),GSC_TRUE);
5296 memcpy(cur.cell,tmpcell,sizeof(char)*tmpix);
5297 if (0 < tblbuf_len) {
5298 memcpy(cur.cell+tmpix,tbl->buf+tblbuf_offset,sizeof(char)*tblbuf_len);
5299 }
5300 cur.cell[cur.cell_len] = '\0';
5301 GSC_DELETE_BUFFER(tmpcell);
5302 }
5303 return cur;
5304 }
5305 }
5306 }
5307}
5308
5318_Bool gsc_get_index_of_genetic_marker(const char* target,
5319 gsc_KnownGenome g,
5320 GSC_GENOLEN_T* out) {
5321 GSC_GENOLEN_T first = 0, last = g.n_markers - 1;
5322 GSC_GENOLEN_T index = (first + last) / 2;
5323 int comparison = strcmp(target,*(g.names_alphabetical[index]));
5324 while (comparison != 0 && first <= last) {
5325 if (comparison == 0) {
5326 if (out != NULL) *out = g.names_alphabetical[index] - g.marker_names;
5327 return 1;
5328 } else if (comparison > 0) {
5329 first = index + 1;
5330 if (first >= g.n_markers) { return 0; }
5331 } else {
5332 if (index == 0) { return 0; }
5333 last = index - 1;
5334 }
5335
5336 // index has been updated, no matter the branch.
5337 index = (first + last) / 2;
5338 comparison = strcmp(target, *(g.names_alphabetical[index]));
5339 }
5340
5341 if (first > last) {
5342 return 0;
5343 }
5344 if (out != NULL) *out = g.names_alphabetical[index] - g.marker_names;
5345 return 1;
5346}
5347
5358 gsc_TableFileCell** queue,
5359 size_t* queuesize) {
5360 gsc_TableFileCell ncell;
5361 if (*queuesize > 0) {
5362 ncell = *queue[0];
5363 /*for (int i = 1; i < *queuesize; ++i) { *queue[i-1] = *queue[i]; }*/
5364 ++*queue;
5365 --*queuesize;
5366 } else {
5368 }
5369 return ncell;
5370}
5371
5372
5393 int min_headerlen,
5394 int max_headerlen,
5395 gsc_TableFileCell* outputq,
5396 size_t* queuesize) {
5397 for (int i = 0; i < max_headerlen + 1; ++i) {
5398 // Read the next cell if it hasn't been read yet
5399 if (*queuesize <= i) {
5400 outputq[i] = gsc_tablefilereader_get_next_cell(tf);
5401 (*queuesize)++;
5402 }
5403
5404 int headerlength = -1;
5405 if (outputq[i].predNewline) { // detected a newline after the previous cell
5406 headerlength = i;
5407 } else if (outputq[i].eof) { // detected end of file at the end of this cell
5408 headerlength = i+1;
5409 }
5410
5411 if (headerlength > 0) {
5412 if (headerlength >= min_headerlen && headerlength <= max_headerlen) {
5413 return headerlength;
5414 } else {
5415 return GSC_NA;
5416 }
5417
5418 }
5419
5420 }
5421 return GSC_NA;
5422}
5423
5424
5454 gsc_TableFileCell* unprocessedqueue,
5455 int ncell_required,
5456 const char** titles_required,
5457 int ncell_optional,
5458 const char** titles_optional,
5459 int* col_order) {
5460 // Check ordering of titles.
5461 // Step 1: Initialise
5462 int ncell_total = ncell_required + ncell_optional;
5463 for (int i = 0; i < ncell_total; ++i) {
5464 col_order[i] = i;
5465 }
5466
5467 // Step 2: Check and save actual ordering of titles
5468 int title_ix = 0;
5469 for (; title_ix < ncell_required; ++title_ix) {
5470 // While we're checking required titles, we return FALSE/0 if we fail to find a match
5471 // for ANY required title inside the first row cells.
5472 // In searching through the first row cells, we should search through all indexes in
5473 // col_order that are same position as title_ix or further along in the shuffled array.
5474 _Bool found_match = 0;
5475 size_t title_len = strlen(titles_required[title_ix]);
5476
5477 for (int header_ix = title_ix; header_ix < ncellrow1; ++header_ix) {
5478 int header_queueix = col_order[header_ix];
5479 // If this cell number has the right title, swap its index to corresponding place
5480 if (unprocessedqueue[header_queueix].cell_len == title_len &&
5481 strncmp(unprocessedqueue[header_queueix].cell,titles_required[title_ix],title_len) == 0) {
5482 if (header_ix != title_ix) {
5483 col_order[header_ix] = col_order[title_ix];
5484 col_order[title_ix] = header_queueix;
5485 }
5486 found_match = 1;
5487 break;
5488 }
5489 }
5490 if (!found_match) { return GSC_FALSE; } // seems this isn't a header, we can't find required titles
5491 }
5492 int matches = ncell_required;
5493 for (; title_ix < ncell_total; ++title_ix) {
5494 // While we're checking optional titles, we set the col_order value of titles that have no
5495 // match in the first row cells to -1, but continue searching rather than returning immediately.
5496 // In searching through the first row cells, we search all indexes in the shuffled col_order
5497 // array that are greater than or equal to the number of titles matched so far.
5498 _Bool found_match = 0;
5499 int title_ix_o = title_ix - ncell_required;
5500 size_t title_len = strlen(titles_optional[title_ix_o]);
5501
5502 for (int header_ix = matches; header_ix < ncellrow1; ++header_ix) {
5503 int header_queueix = col_order[header_ix];
5504 // If this cell number has the right title, swap its index to corresponding place
5505 if (unprocessedqueue[header_queueix].cell_len == title_len &&
5506 strncmp(unprocessedqueue[header_queueix].cell,titles_optional[title_ix_o],title_len) == 0) {
5507 if (header_ix != title_ix) {
5508 col_order[header_ix] = col_order[title_ix];
5509 col_order[title_ix] = header_queueix;
5510 }
5511 found_match = 1;
5512 ++matches;
5513 break;
5514 }
5515 }
5516 if (!found_match) { col_order[title_ix] = -1; }
5517 }
5518 return GSC_TRUE;
5519}
5520
5521
5555static size_t gsc_helper_parse_mapfile(const char* filename, struct gsc_MapfileUnit** out) {
5556 if (filename == NULL) return 0;
5557
5559
5560 size_t row = 1;
5561 size_t col = 1;
5562
5563 gsc_TableFileCell cellsread[4] = { 0 };
5564 gsc_TableFileCell* cellqueue = cellsread;
5565 size_t queue_size = 0;
5566 int row1len = gsc_helper_read_first_row(&tf, 3, 3, cellqueue, &queue_size);
5567
5568 const char* titles[3] = { "marker", "chr", "pos"};
5569 int colnums[3];
5570 int marker_colnum, chr_colnum, pos_colnum;
5571 GSC_LOGICVAL header = (row1len == 3) ? gsc_helper_parse_ncell_header(row1len, cellqueue, 3, titles, 0, NULL, colnums) : GSC_NA;
5572 if (header == GSC_TRUE) {
5573 printf("(Loading %s) Format: map file with header\n", filename);
5574 marker_colnum = colnums[0] + 1, chr_colnum = colnums[1] + 1, pos_colnum = colnums[2] + 1;
5575 } else if (header == GSC_FALSE) {
5576 printf("(Loading %s) Format: map file without header\n", filename);
5577 marker_colnum = 1, chr_colnum = 2, pos_colnum = 3;
5578 } else {
5579 printf("(Loading %s) Failure: Cannot identify the expected 3 columns of the map file\n", filename);
5580 for (int i = 0; i < queue_size; ++i) {
5581 if (!cellqueue[i].isCellShallow) { GSC_FREE(cellqueue[i].cell); }
5582 }
5584 return 0;
5585 }
5586
5587 if (header) {
5588 cellqueue += 3;
5589 queue_size -= 3;
5590 if (!cellsread[0].isCellShallow) { GSC_FREE(cellsread[0].cell); }
5591 if (!cellsread[1].isCellShallow) { GSC_FREE(cellsread[1].cell); }
5592 if (!cellsread[2].isCellShallow) { GSC_FREE(cellsread[2].cell); }
5593 }
5594 _Bool goodrow = (header) ? 0 : 1; // discard first row if it's a header, keep if it's not.
5595 size_t goodrow_counter = 0;
5596
5597 char* marker = NULL;
5598 char* chr = NULL;
5599 double pos = 0;
5600 char* conversionflag;
5601
5603
5604 gsc_TableFileCell ncell;
5605 do {
5606 ncell = gsc_helper_tablefilereader_get_next_cell_wqueue(&tf, &cellqueue, &queue_size);
5607
5608 // Update row/col position and save predecessor row
5609 if (ncell.cell != NULL) {
5610 if (ncell.predNewline) {
5611 if (goodrow) { // save predecessor row
5612 buffer[goodrow_counter].name = marker;
5613 buffer[goodrow_counter].chr = chr;
5614 buffer[goodrow_counter].pos = pos;
5615
5616 ++goodrow_counter;
5617 if (goodrow_counter >= buffercap) {
5618 GSC_STRETCH_BUFFER(buffer,2*row);
5619 }
5620 marker = NULL;
5621 chr = NULL;
5622 } else {
5623 if (marker != NULL) {
5624 GSC_FREE(marker);
5625 }
5626 if (chr != NULL) {
5627 GSC_FREE(chr);
5628 }
5629 }
5630 row += ncell.predNewline;
5631 goodrow = 1;
5632 col = 1;
5633 }
5634 col += (ncell.predCol > 0) ? 1 : 0;
5635
5636 // Parse this cell
5637 if (ncell.cell_len == 0) {
5638 goodrow = 0;
5639 } if (col == marker_colnum) {
5641 marker = ncell.cell; ncell.cell = NULL;
5642 //ncell.isCellShallow = GSC_TRUE; // so it isn't freed.
5643
5644 } else if (col == chr_colnum) {
5646 chr = ncell.cell; ncell.cell = NULL;
5647 //ncell.isCellShallow = GSC_TRUE; // so it isn't freed
5648 /*char tmp = ncell.cell[ncell.cell_len]; ncell.cell[ncell.cell_len] = '\0';
5649 chr = strtoul(ncell.cell,&conversionflag,36);
5650 ncell.cell[ncell.cell_len] = tmp;
5651 if (conversionflag != ncell.cell + ncell.cell_len) { // unsuccessful read
5652 //fprintf(stderr,"Entry at row %i column %i of file %s could not be parsed as an integer or alphanumeric string\n", row, chr_colnum, filename);
5653 goodrow = 0;
5654 }*/
5655
5656 } else if (col == pos_colnum) {
5657 char tmp = ncell.cell[ncell.cell_len]; ncell.cell[ncell.cell_len] = '\0';
5658 pos = strtod(ncell.cell,&conversionflag);
5659 ncell.cell[ncell.cell_len] = tmp;
5660 if (conversionflag != ncell.cell + ncell.cell_len) { // unsuccessful read
5661 goodrow = 0;
5662 //fprintf(stderr,"Entry at row %i column %i of file %s could not be parsed as a numeric value\n", row, pos_colnum, filename);
5663 }
5664
5665 } else {
5666 goodrow = 0;
5667 }
5668
5669 // Reset to get next cell.
5670 if (!ncell.isCellShallow && ncell.cell != NULL) { GSC_FREE(ncell.cell); }
5671 }
5672 } while (!ncell.eof);
5673
5674 if (col == 3) {
5675 if (goodrow) { // save predecessor row
5676 buffer[goodrow_counter].name = marker;
5677 buffer[goodrow_counter].chr = chr;
5678 buffer[goodrow_counter].pos = pos;
5679
5680 ++goodrow_counter;
5681 marker = NULL;
5682 } else {
5683 if (marker != NULL) {
5684 GSC_FREE(marker);
5685 }
5686 if (chr != NULL) {
5687 GSC_FREE(chr);
5688 }
5689 }
5690 }
5691 //row -= ncell.predNewline; // don't count trailing newlines in stats.
5692
5693 printf("(Loading %s) %u marker(s) with map positions were loaded. Failed to parse %u line(s).\n", filename, (unsigned int) goodrow_counter, (unsigned int) (row - header - goodrow_counter));
5695
5696 // Check outputs. We don't delete the buffers because we want to leave them alive with our callers holding the handles.
5697 GSC_FINALISE_BUFFER(buffer,*out,goodrow_counter);
5698 return goodrow_counter;
5699}
5700
5701
5719 GSC_GENOLEN_T n_markers_in_list,
5720 struct gsc_MapfileUnit** markerlist) {
5721 struct gsc_MapfileUnit* rlist = *markerlist;
5722 GSC_GENOLEN_T n_joined = 0;
5723 /*size_t consecutivity_bias; // we cache the index of the last name we found and pre-check whether the next marker
5724 // in the list is the next marker in the genome. For the case where people organise their genotype file and genetic
5725 // map file in the same order. Edit: decided this is not likely enough a situation to build this in.*/
5726
5727 for (GSC_GENOLEN_T i = 0; i < n_markers_in_list; ++i) {
5728 if (rlist[i].name != NULL) {
5729 GSC_GENOLEN_T nameix;
5730 if (gsc_get_index_of_genetic_marker(rlist[i].name, g, &nameix)) {
5731 if (n_joined != i) {
5732 rlist[n_joined] = rlist[i];
5733 }
5734 n_joined++;
5735
5736 } else { // discard this marker. n_joined lags behind i by one more step.
5737 GSC_FREE(rlist[i].name);
5738 GSC_FREE(rlist[i].chr);
5739 }
5740 }
5741 }
5742
5743 return n_joined;
5744}
5745
5746
5750 GSC_ID_T newmapindex = 0;
5751 if (d->genome.n_maps > 0) {
5752 newmapindex = d->genome.n_maps;
5753
5754 gsc_MapID* tmpMapIDs = gsc_malloc_wrap(sizeof(gsc_MapID)*(newmapindex+1),GSC_TRUE);
5755 memcpy(tmpMapIDs,d->genome.map_ids,sizeof(gsc_MapID)*newmapindex);
5757 d->genome.map_ids = tmpMapIDs;
5758
5759 gsc_RecombinationMap* tmpMaps = gsc_malloc_wrap(sizeof(gsc_RecombinationMap)*(newmapindex+1),GSC_TRUE);
5760 memcpy(tmpMaps,d->genome.maps,sizeof(gsc_RecombinationMap)*newmapindex);
5761 GSC_FREE(d->genome.maps);
5762 d->genome.maps = tmpMaps;
5763
5764 } else {
5767 }
5768 d->genome.map_ids[newmapindex] = gsc_get_new_map_id(d);
5769 d->genome.n_maps++;
5770 d->genome.maps[newmapindex] = map;
5771
5772 return d->genome.map_ids[newmapindex];
5773}
5774
5778 GSC_ID_T neweffsetindex = 0;
5779 if (d->n_eff_sets > 0) {
5780 neweffsetindex = d->n_eff_sets;
5781
5782 gsc_EffectID* tmpIDs = gsc_malloc_wrap(sizeof(gsc_EffectID)*(neweffsetindex+1),GSC_TRUE);
5783 memcpy(tmpIDs,d->eff_set_ids,sizeof(gsc_EffectID)*neweffsetindex);
5785 d->eff_set_ids = tmpIDs;
5786
5787 gsc_MarkerEffects* tmpMats = gsc_malloc_wrap(sizeof(*tmpMats)*(neweffsetindex+1),GSC_TRUE);
5788 memcpy(tmpMats,d->e,sizeof(*tmpMats)*neweffsetindex);
5789 GSC_FREE(d->e);
5790 d->e = tmpMats;
5791
5792 } else {
5795 }
5796 d->eff_set_ids[neweffsetindex] = gsc_get_new_eff_set_id(d);
5797 d->n_eff_sets++;
5798 d->e[neweffsetindex] = effset;
5799
5800 return d->eff_set_ids[neweffsetindex];
5801}
5802
5809static void gsc_helper_sort_markerlist(GSC_GENOLEN_T n_markers, struct gsc_MapfileUnit* markerlist) {
5810 if (n_markers < 2) { return; }
5811
5812 // sort by linkage group
5813 qsort(markerlist,n_markers,sizeof(*markerlist),gsc_helper_mapfileunit_ascending_chr_comparer);
5814
5815 // sort each linkage group by pos
5816 //int n_chr = 1;
5817 GSC_GENOLEN_T chr_start = 0;
5818 char* current_chr = markerlist[0].chr;
5819
5820 for (GSC_GENOLEN_T i = 1; i < n_markers; ++i) {
5821 if (strcmp(markerlist[i].chr, current_chr) != 0) { // found end of current chr
5822 //n_chr++;
5823 qsort(markerlist + chr_start, i - chr_start,
5824 sizeof(*markerlist), gsc_helper_mapfileunit_ascending_d_comparer);
5825
5826 chr_start = i;
5827 current_chr = markerlist[i].chr;
5828 }
5829 }
5830
5831 qsort(markerlist + chr_start, n_markers - chr_start,
5832 sizeof(*markerlist), gsc_helper_mapfileunit_ascending_d_comparer);
5833 //return n_chr;
5834}
5835
5850 GSC_GENOLEN_T n_markers,
5851 struct gsc_MapfileUnit* markerlist) {
5852 if (n_markers == 0) return NO_MAP;
5853
5854 GSC_CREATE_BUFFER(chr_nmembers,GSC_GENOLEN_T,40);
5855 memset(chr_nmembers,0,sizeof(*chr_nmembers)*40);
5856 GSC_CREATE_BUFFER(chr_ids,char*,40);
5857 chr_nmembers[0] = 1;
5858 GSC_GENOLEN_T n_chr = 1;
5859 chr_ids[n_chr-1] = markerlist[0].chr;
5860 markerlist[0].chr = NULL; // so that it will not be freed
5861 for (GSC_GENOLEN_T i = 1; i < n_markers; ++i) {
5862 while (i < n_markers && markerlist[i].name == NULL) {
5863 ++i;
5864 }
5865 if (strcmp(chr_ids[n_chr-1], markerlist[i].chr) != 0) {
5866 // First of next
5867 if (n_chr >= chr_nmemberscap) {
5868 GSC_STRETCH_BUFFER(chr_ids,2*n_chr);
5869 GSC_STRETCH_BUFFER(chr_nmembers,2*n_chr);
5870 memset(chr_nmembers+n_chr,0,sizeof(*chr_nmembers)*n_chr);
5871 }
5872 ++n_chr;
5873 chr_ids[n_chr-1] = markerlist[i].chr;
5874 markerlist[i].chr = NULL;
5875 chr_nmembers[n_chr-1] = 1;
5876 } else {
5877 ++(chr_nmembers[n_chr-1]);
5878 }
5879 }
5880
5881 gsc_RecombinationMap map = {.n_chr=n_chr,
5882 .chr_names=NULL,
5883 .chrs=gsc_malloc_wrap(sizeof(gsc_LinkageGroup) * n_chr, GSC_TRUE) };
5884 GSC_FINALISE_BUFFER(chr_ids,map.chr_names,n_chr);
5885
5886 // Populate the map. Each chr/linkage group may be "Simple" or "Reordered"
5887 GSC_GENOLEN_T could_not_match = 0;
5888 GSC_GENOLEN_T current_marker = 0;
5889 GSC_GENOLEN_T first_marker;
5890 GSC_GENOLEN_T n_bad_chr = 0;
5891 GSC_GENOLEN_T n_sparse_chr = 0;
5892 for (GSC_GENOLEN_T chr_ix = 0; chr_ix < map.n_chr; ++chr_ix) {
5893 first_marker = current_marker;
5894 double chrdist = markerlist[first_marker + chr_nmembers[chr_ix] - 1].pos - markerlist[first_marker].pos;
5895 double* lgdists = gsc_malloc_wrap(sizeof(double)*(chr_nmembers[chr_ix]),GSC_TRUE);
5896
5897 char found_first = GSC_FALSE;
5898 // n_goodmembers == 0 is a guard on firsts_coord_in_genome, but we
5899 // still initialise it here (to a value too high to be reasonable)
5900 // because the compiler can't tell that.
5901 GSC_GENOLEN_T firsts_coord_in_genome = d->genome.n_markers;
5902 GSC_GENOLEN_T n_goodmembers = 0;
5903 GSC_GENOLEN_T* marker_coords = NULL;
5904
5905 GSC_GENOLEN_T endpt = first_marker + chr_nmembers[chr_ix];
5906 for (; current_marker < endpt; ++current_marker) { // simple recombination map, if possible
5907 if (markerlist[current_marker].name == NULL) {
5908 continue;
5909 }
5910
5911 if (!found_first) {
5912 GSC_GENOLEN_T coord;
5913 if (!gsc_get_index_of_genetic_marker(markerlist[current_marker].name, d->genome, &coord)) {
5914 could_not_match++;
5915 } else {
5916 found_first = GSC_TRUE;
5917 first_marker = current_marker;
5918 firsts_coord_in_genome = coord;
5919 lgdists[n_goodmembers] = (markerlist[current_marker].pos - markerlist[first_marker].pos) / chrdist;
5920 n_goodmembers++;
5921 }
5922 } else if (firsts_coord_in_genome + n_goodmembers < d->genome.n_markers &&
5923 strcmp(markerlist[current_marker].name, d->genome.marker_names[firsts_coord_in_genome + n_goodmembers]) == 0) {
5924 // we are a simple linkage group still so far.
5925 lgdists[n_goodmembers] = (markerlist[current_marker].pos - markerlist[first_marker].pos) / chrdist;
5926 n_goodmembers++;
5927 } else {
5928 // Just discovered we are a reordered linkage group. Copy over the marker indexes that were as expected.
5929 marker_coords = gsc_malloc_wrap(sizeof(*marker_coords)*(chr_nmembers[chr_ix]),GSC_TRUE);
5930 for (GSC_GENOLEN_T backfill = 0; backfill < n_goodmembers; ++backfill) {
5931 marker_coords[backfill] = firsts_coord_in_genome + backfill;
5932 }
5933 break;
5934 }
5935
5936 }
5937 for (; current_marker < endpt; ++current_marker) { // reordered recombination map, if previous failed.
5938 if (markerlist[current_marker].name == NULL) {
5939 continue;
5940 }
5941
5942 GSC_GENOLEN_T coord;
5943 if (!gsc_get_index_of_genetic_marker(markerlist[current_marker].name, d->genome, &coord)) {
5944 ++could_not_match;
5945 } else {
5946 marker_coords[n_goodmembers] = coord;
5947 lgdists[n_goodmembers] = (markerlist[current_marker].pos - markerlist[first_marker].pos) / chrdist;
5948 ++n_goodmembers;
5949 }
5950 }
5951
5952 if (n_goodmembers == 0) { // || firsts_coord_in_genome >= d->genome.n_markers) {
5953 n_bad_chr++;
5954 } else if (marker_coords == NULL) {
5955 GSC_GENOLEN_T chr_ix_actual = chr_ix-n_bad_chr;
5956 map.chrs[chr_ix_actual].type = GSC_LINKAGEGROUP_SIMPLE;
5957 map.chrs[chr_ix_actual].map.simple.expected_n_crossovers = chrdist / 100;
5958 map.chrs[chr_ix_actual].map.simple.n_markers = n_goodmembers;
5959 map.chrs[chr_ix_actual].map.simple.first_marker_index = firsts_coord_in_genome;
5960 map.chrs[chr_ix_actual].map.simple.dists = lgdists;
5961 } else {
5962 GSC_GENOLEN_T chr_ix_actual = chr_ix-n_bad_chr;
5963 map.chrs[chr_ix_actual].type = GSC_LINKAGEGROUP_REORDER;
5964 map.chrs[chr_ix_actual].map.reorder.expected_n_crossovers = chrdist / 100;
5965 map.chrs[chr_ix_actual].map.reorder.n_markers = n_goodmembers;
5966 map.chrs[chr_ix_actual].map.reorder.marker_indexes = marker_coords;
5967 map.chrs[chr_ix_actual].map.reorder.dists = lgdists;
5968 }
5969 if (chrdist >= 5000*n_goodmembers) { ++n_sparse_chr; }
5970 }
5971 GSC_DELETE_BUFFER(chr_nmembers);
5972 map.n_chr = map.n_chr-n_bad_chr;
5973 if (map.n_chr == 0) {
5974 GSC_FREE(map.chrs);
5975 return NO_MAP;
5976 }
5977 if (n_sparse_chr > 0) {
5978 fprintf(stderr,"%d of this map's chromosomes are very sparse (averaging less than 1 marker "
5979 "per 5 Morgans of distance). If the map is not expected to be this sparse, check that "
5980 "positions in the map file are in centimorgans, not base pairs.\n", n_sparse_chr);
5981 }
5983}
5984
5985
6002 GSC_GENOLEN_T n_markers,
6003 char** markernames,
6004 double expected_n_recombinations) {
6005 if (d->genome.n_markers == 0) {
6006 fprintf(stderr, "Cannot create a recombination map if there is no genome\n");
6007 return NO_MAP;
6008 }
6009
6010 gsc_RecombinationMap map = {.n_chr=1, .chr_names=NULL, .chrs=gsc_malloc_wrap(sizeof(gsc_LinkageGroup)*1, GSC_TRUE) };
6011
6012 if (markernames == NULL) {
6013 double* lgdists = gsc_malloc_wrap(sizeof(double)*d->genome.n_markers,GSC_TRUE);
6014 double lgdist = 1./(d->genome.n_markers-1);
6015 lgdists[0] = 0;
6016 for (GSC_GENOLEN_T i = 1; i < d->genome.n_markers; ++i) { lgdists[i] = lgdists[i-1] + lgdist; }
6017
6018 map.chrs[0].type = GSC_LINKAGEGROUP_SIMPLE;
6019 map.chrs[0].map.simple.expected_n_crossovers = expected_n_recombinations;
6021 map.chrs[0].map.simple.first_marker_index = 0;
6022 map.chrs[0].map.simple.dists = lgdists;
6023 } else {
6024 if (n_markers == 0) return NO_MAP;
6025
6026 // markernames could still be simple or reordered compared to the d->genome, so need to check that first.
6027 _Bool found_first = 0;
6028 GSC_GENOLEN_T could_not_match = 0;
6029 GSC_GENOLEN_T firsts_coord_in_genome = d->genome.n_markers;
6030 GSC_GENOLEN_T chrmarker_ix = 0;
6031
6032 GSC_GENOLEN_T* marker_coords = NULL;
6033 for (GSC_GENOLEN_T i = 0; i < n_markers; ++i) {
6034 if (!found_first || marker_coords != NULL) {
6035 // We are first or we are a reordered linkage group. Find what index in the genome the next marker is stored at.
6036 GSC_GENOLEN_T coord;
6037
6038 if (markernames[i] == NULL) {
6039 could_not_match++;
6040 } else if (!gsc_get_index_of_genetic_marker(markernames[i], d->genome, &coord )) {
6041 could_not_match++;
6042 } else if (!found_first) {
6043 found_first = 1;
6044 firsts_coord_in_genome = coord;
6045 chrmarker_ix++;
6046 } else { // must be the case that we have marker_coords != NULL and are a reordered linkage group
6047 marker_coords[chrmarker_ix] = coord;
6048 chrmarker_ix++;
6049 }
6050
6051 } else if (firsts_coord_in_genome < d->genome.n_markers &&
6052 strcmp(markernames[i], d->genome.marker_names[firsts_coord_in_genome + i]) == 0) {
6053 // are a simple linkage group still so far.
6054 chrmarker_ix++;
6055
6056 } else {
6057 // Just discovered we are a reordered linkage group. Copy over the marker indexes that were as expected.
6058 marker_coords = gsc_malloc_wrap(sizeof(*marker_coords)*n_markers,GSC_TRUE);
6059 for (GSC_GENOLEN_T backfill = 0; backfill < chrmarker_ix; ++backfill) {
6060 marker_coords[backfill] = firsts_coord_in_genome + backfill;
6061 }
6062
6063 if (markernames[i] == NULL) {
6064 could_not_match++;
6065 } else if (!gsc_get_index_of_genetic_marker(markernames[i], d->genome, &(marker_coords[chrmarker_ix]) )) {
6066 could_not_match++;
6067 } else {
6068 chrmarker_ix++;
6069 }
6070 }
6071 }
6072
6073 double* lgdists = gsc_malloc_wrap(sizeof(double)*chrmarker_ix,GSC_TRUE);
6074 double lgdist = 1./(chrmarker_ix-1);
6075 lgdists[0] = 0;
6076 for (GSC_GENOLEN_T i = 1; i < chrmarker_ix; ++i) { lgdists[i] = lgdists[i-1] + lgdist; }
6077
6078 if (marker_coords == NULL) {
6079 map.chrs[0].type = GSC_LINKAGEGROUP_SIMPLE;
6080 map.chrs[0].map.simple.expected_n_crossovers = expected_n_recombinations;
6081 map.chrs[0].map.simple.n_markers = chrmarker_ix;
6082 map.chrs[0].map.simple.first_marker_index = firsts_coord_in_genome;
6083 map.chrs[0].map.simple.dists = lgdists;
6084 } else {
6085 map.chrs[0].type = GSC_LINKAGEGROUP_REORDER;
6086 map.chrs[0].map.reorder.expected_n_crossovers = expected_n_recombinations;
6087 map.chrs[0].map.reorder.n_markers = chrmarker_ix;
6088 map.chrs[0].map.reorder.marker_indexes = marker_coords;
6089 map.chrs[0].map.reorder.dists = lgdists;
6090 }
6091
6092 if (could_not_match > 0) {
6093 fprintf(stderr, "%d of the marker names do not appear in the genome\n", could_not_match);
6094 }
6095
6096 }
6097
6099}
6100
6101
6119 if (d->genome.n_markers == 0) {
6120 fprintf(stderr, "Cannot create a recombination map if there is no genome\n");
6121 return NO_MAP;
6122 }
6123
6124 if (markernames == NULL) {
6125 n_markers = d->genome.n_markers;
6126 //markernames = d->genome.marker_names;
6127
6128 gsc_RecombinationMap map = {.n_chr=n_markers, .chr_names=NULL,
6129 .chrs=gsc_malloc_wrap(sizeof(gsc_LinkageGroup)*n_markers, GSC_TRUE) };
6130
6131 for (GSC_GENOLEN_T i = 0; i < n_markers; ++i) {
6132 map.chrs[i].type = GSC_LINKAGEGROUP_SIMPLE;
6133 map.chrs[i].map.simple.n_markers = 1;
6134 map.chrs[i].map.simple.first_marker_index = i;
6135
6136 // Lines below are dependent on pulling crossover counts from Poisson dist in generate_gamete,
6137 // so that .expected_n_crossovers = 0 means .dists will never be accessed
6139 map.chrs[i].map.simple.dists = NULL;
6140 }
6142
6143 } else { // we've been given a list of markers. Our task is slightly more complex.
6144
6145 // First find all marker name indexes:
6146 GSC_CREATE_BUFFER(m_ix, GSC_GENOLEN_T, n_markers);
6147 GSC_GENOLEN_T n_good = 0;
6148 GSC_GENOLEN_T could_not_match = 0;
6149 for (GSC_GENOLEN_T m = 0; m < n_markers; ++m) {
6150 if (markernames[m] == NULL) {
6151 could_not_match++;
6152 } else if (!gsc_get_index_of_genetic_marker(markernames[m], d->genome, &m_ix[n_good] )) {
6153 could_not_match++;
6154 } else {
6155 ++n_good;
6156 }
6157 }
6158 if (could_not_match > 0) {
6159 fprintf(stderr, "%d of the marker names do not appear in the genome\n", could_not_match);
6160 }
6161
6162 // Then create and populate the map
6163 gsc_RecombinationMap map = {.n_chr=n_good,
6164 .chrs=gsc_malloc_wrap(sizeof(gsc_LinkageGroup)*n_good, GSC_TRUE) };
6165 for (GSC_GENOLEN_T i = 0; i < n_good; ++i) {
6166 map.chrs[i].type = GSC_LINKAGEGROUP_SIMPLE;
6167 map.chrs[i].map.simple.n_markers = 1;
6168 map.chrs[i].map.simple.first_marker_index = m_ix[i];
6169
6170 // Lines below are dependent on pulling crossover counts from Poisson dist in generate_gamete,
6171 // so that .expected_n_crossovers = 0 means .dists will never be accessed
6173 map.chrs[i].map.simple.dists = NULL;
6174 }
6175
6176 GSC_DELETE_BUFFER(m_ix);
6178 }
6179}
6180
6181
6219gsc_MapID gsc_load_mapfile(SimData* d, const char* filename) {
6220 if (filename == NULL) return NO_MAP;
6221
6222 struct gsc_MapfileUnit* mapcontents = NULL;
6223 size_t nrows = gsc_helper_parse_mapfile(filename,&mapcontents);
6224 if (nrows == 0 || mapcontents == NULL) {
6225 if (mapcontents != NULL) {
6226 GSC_FREE(mapcontents);
6227 }
6228 return NO_MAP;
6229 }
6230
6231 _Bool freeMapNames = 1;
6232 if (d->genome.n_markers > 0) {
6233 // if genome is already set, leftjoin on those markers.
6234 GSC_GENOLEN_T new_nrows = gsc_helper_str_markerlist_leftjoin(d->genome, nrows, &mapcontents);
6235 if (new_nrows < nrows) {
6236 printf("Discarded %lu markers when loading map %s because they do not appear in the primary map.\n", (long unsigned int) (nrows - new_nrows), filename);
6237 }
6238 nrows = new_nrows;
6239 gsc_helper_sort_markerlist(nrows,mapcontents);
6240 } else {
6241 // else set up the list of markers tracked by the simulation
6242 gsc_helper_sort_markerlist(nrows,mapcontents);
6243 d->genome = (gsc_KnownGenome){
6244 .n_markers = nrows,
6245 .marker_names = gsc_malloc_wrap(sizeof(char**)*nrows,GSC_TRUE),
6246 .names_alphabetical = gsc_malloc_wrap(sizeof(char**)*nrows,GSC_TRUE),
6247 .n_maps = 0,
6248 .map_ids = NULL,
6249 .maps = NULL
6250 };
6251 for (GSC_GENOLEN_T i = 0; i < d->genome.n_markers; ++i) {
6252 d->genome.marker_names[i] = mapcontents[i].name;
6253 d->genome.names_alphabetical[i] = &(d->genome.marker_names[i]);
6254 }
6255 qsort(d->genome.names_alphabetical,d->genome.n_markers,sizeof(*d->genome.names_alphabetical),gsc_helper_indirect_alphabetical_str_comparer);
6256
6257 // Want to raise a warning if any marker names are repeated. One quick scan through.
6258 int n_dups = 0;
6259 for (GSC_GENOLEN_T i = 1; i < d->genome.n_markers; ++i) {
6260 if (strcmp(*d->genome.names_alphabetical[i-1],*d->genome.names_alphabetical[i]) == 0) { ++n_dups; }
6261 }
6262 if (n_dups > 0) {
6263 fprintf(stderr,"%d marker names were duplicates. It is recommended to remove duplicate names from the map file "
6264 "because data will only be loaded into one of the duplicates.\n", n_dups);
6265 }
6266
6267 freeMapNames = 0;
6268 //printf( "Warning: loading genetic map before loading any founder genotypes. Many simulation operations will not yet run.\n");
6269 }
6270
6271 gsc_MapID map = gsc_create_recombmap_from_markerlist(d, nrows, mapcontents);
6272 if (freeMapNames) {
6273 for (size_t i = 0; i < nrows; ++i) {
6274 GSC_FREE(mapcontents[i].name);
6275 }
6276 }
6277 for (size_t i = 0; i < nrows; ++i) { // free the chr names that haven't been moved to the proper map object
6278 if (mapcontents[i].chr != NULL) { GSC_FREE(mapcontents[i].chr); }
6279 }
6280 GSC_FREE(mapcontents);
6281
6282 return map;
6283}
6284
6285
6324 if (filename == NULL) return GSC_NO_EFFECTSET;
6325 if (d->genome.n_markers == 0) return GSC_NO_EFFECTSET;
6326
6328
6329 size_t row = 1;
6330 size_t col = 1;
6331
6332 gsc_TableFileCell cellsread[5] = { 0 };
6333 gsc_TableFileCell* cellqueue = cellsread;
6334 size_t queuesize = 0;
6335 int row1len = gsc_helper_read_first_row(&tf, 3, 4, cellqueue, &queuesize);
6336
6337 const char* titles[4] = { "marker", "allele", "eff", "centre"};
6338 int colnums[4];
6339 int marker_colnum, allele_colnum, eff_colnum, centre_colnum;
6340 GSC_LOGICVAL header = gsc_helper_parse_ncell_header(row1len, cellqueue, 3, titles, 1, titles+3, colnums);
6341 if (header == GSC_TRUE) {
6342 printf("(Loading %s) Format: effect file with header\n", filename);
6343 marker_colnum = colnums[0] + 1, allele_colnum = colnums[1] + 1, eff_colnum = colnums[2] + 1;
6344 centre_colnum = colnums[3] + 1; // might be 0 if the column does not exist.
6345 } else if (header == GSC_FALSE) {
6346 printf("(Loading %s) Format: effect file without header\n", filename);
6347 marker_colnum = 1, allele_colnum = 2, eff_colnum = 3;
6348 // 3 + 1: number of official titles plus the one from the second line
6349 centre_colnum = (row1len > 3) ? 4 : 0;
6350 } else {
6351 printf("(Loading %s) Failure: Cannot identify the 3 required columns of the effect file\n", filename);
6353 return NO_EFFECTSET;
6354 }
6355
6356 if (centre_colnum > 0) {
6357 printf("(Loading %s) The file has %d columns. Identified optional column \"centre\"\n", filename, row1len);
6358 }
6359
6360 if (header) {
6361 cellqueue += row1len;
6362 queuesize -= row1len;
6363 for (int i = 0; i < row1len; ++i) {
6364 if (!cellsread[i].isCellShallow) { GSC_FREE(cellsread[i].cell); }
6365 }
6366 }
6367 _Bool goodrow = (header) ? 0 : 1; // discard first row if it's a header, keep if it's not.
6368
6370 GSC_GENOLEN_T n_effects = 0;
6371
6372 char* conversionflag;
6373 gsc_TableFileCell ncell;
6374
6375 do {
6376 ncell = gsc_helper_tablefilereader_get_next_cell_wqueue(&tf, &cellqueue, &queuesize);
6377
6378 if (ncell.cell != NULL) { // so that we can cope with missing final newline
6379 if (ncell.predNewline) {
6380 // save predecessor row then update row/col position
6381 if (goodrow && col >= row1len) {
6382 ++n_effects;
6383 if (n_effects >= raweffectscap) {
6384 GSC_STRETCH_BUFFER(raweffects,2*n_effects);
6385 }
6386 }
6387
6388 row += ncell.predNewline;
6389 goodrow = 1;
6390 col = 1;
6391 }
6392 col += (ncell.predCol > 0) ? 1 : 0; // multiple column spacers treated as one
6393
6394 if (ncell.cell_len == 0) {
6395 goodrow = 0;
6396 } else if (col == marker_colnum) {
6397 char tmp = ncell.cell[ncell.cell_len]; ncell.cell[ncell.cell_len] = '\0';
6398 _Bool validmarker = gsc_get_index_of_genetic_marker(ncell.cell,d->genome,
6399 &(raweffects[n_effects].markerix));
6400 ncell.cell[ncell.cell_len] = tmp;
6401 if (!validmarker) {
6402 goodrow = 0;
6403 //fprintf(stderr,"Entry at row %i column %i of file %s does not match the name of a tracked marker\n", row, marker_colnum, filename);
6404 }
6405
6406 } else if (col == allele_colnum) {
6407 if (ncell.cell_len > 1) {
6408 goodrow = 0;
6409 //fprintf(stderr,"Entry at row %i column %i of file %s was too long to represent a single allele\n", row, allele_colnum, filename);
6410 } else {
6411 raweffects[n_effects].allele = ncell.cell[0];
6412 }
6413
6414 } else if (col == eff_colnum) {
6415 char tmp = ncell.cell[ncell.cell_len]; ncell.cell[ncell.cell_len] = '\0';
6416 raweffects[n_effects].eff = strtod(ncell.cell,&conversionflag);
6417 ncell.cell[ncell.cell_len] = tmp;
6418 if (conversionflag != ncell.cell + ncell.cell_len) { // unsuccessful read
6419 goodrow = 0;
6420 //fprintf(stderr,"Entry at row %i column %i of file %s could not be parsed as a numeric value\n", row, eff_colnum, filename);
6421 }
6422
6423 } else if (col == centre_colnum) {
6424 char tmp = ncell.cell[ncell.cell_len]; ncell.cell[ncell.cell_len] = '\0';
6425 raweffects[n_effects].centre = strtod(ncell.cell,&conversionflag);
6426 ncell.cell[ncell.cell_len] = tmp;
6427 if (conversionflag != ncell.cell + ncell.cell_len) { // unsuccessful read
6428 goodrow = 0;
6429 //fprintf(stderr,"Entry at row %i column %i of file %s could not be parsed as a numeric value\n", row, eff_colnum, filename);
6430 }
6431
6432 } else {
6433 goodrow = 0;
6434 }
6435
6436 // Reset
6437 if (!ncell.isCellShallow) { GSC_FREE(ncell.cell); }
6438 }
6439 } while (!ncell.eof);
6440
6441 if (goodrow && col >= row1len) { // the final row, potentially.
6442 ++n_effects;
6443 }
6444
6445 printf("(Loading %s) %lu effect value(s) were loaded. Failed to parse %lu line(s).\n",
6446 filename, (long unsigned int) n_effects, (long unsigned int) (row - header - n_effects));
6448
6449 if (n_effects == 0) {
6450 GSC_DELETE_BUFFER(raweffects);
6451 return GSC_NO_EFFECTSET;
6452 }
6453
6454 // now sort the raweffects based on markerix
6455 qsort(raweffects,n_effects,sizeof(*raweffects),gsc_helper_effectfileunit_ascending_mix_comparer);
6456
6457 // Create empty marker effects
6458 gsc_MarkerEffects e = { 0 };
6459 e.n_markers = d->genome.n_markers;
6461 e.allele = gsc_malloc_wrap(sizeof(*e.allele)*n_effects, GSC_TRUE);
6462 e.eff = gsc_malloc_wrap(sizeof(*e.eff)*n_effects, GSC_TRUE);
6463 if (centre_colnum) {
6464 e.centre = gsc_malloc_wrap(sizeof(*e.centre)*e.n_markers, GSC_TRUE);
6465 for (GSC_GENOLEN_T i = 0; i < e.n_markers; ++i) {
6466 e.centre[i] = 0;
6467 }
6468 } else {
6469 e.centre = NULL;
6470 }
6471
6472 // Populate MarkerEffects
6473 if (centre_colnum) { // conditional moved outside of inner loop
6474 GSC_GENOLEN_T markerix_current = 0;
6475 for (GSC_GENOLEN_T i = 0; i < n_effects; ++i) {
6476 if (raweffects[i].markerix != markerix_current) {
6477 for (GSC_GENOLEN_T j = markerix_current; j < raweffects[i].markerix; ++j) {
6478 e.cumn_alleles[j] = i;
6479 }
6480 markerix_current = raweffects[i].markerix;
6481 }
6482
6483 e.allele[i] = raweffects[i].allele;
6484 e.eff[i] = raweffects[i].eff;
6485 e.centre[markerix_current] += raweffects[i].centre * raweffects[i].eff; // line that differs
6486 }
6487 for (GSC_GENOLEN_T j = markerix_current; j < e.n_markers; ++j) {
6488 e.cumn_alleles[j] = n_effects;
6489 }
6490 } else {
6491 GSC_GENOLEN_T markerix_current = 0;
6492 for (GSC_GENOLEN_T i = 0; i < n_effects; ++i) {
6493 if (raweffects[i].markerix != markerix_current) {
6494 for (GSC_GENOLEN_T j = markerix_current; j < raweffects[i].markerix; ++j) {
6495 e.cumn_alleles[j] = i;
6496 }
6497 markerix_current = raweffects[i].markerix;
6498 }
6499
6500 e.allele[i] = raweffects[i].allele;
6501 e.eff[i] = raweffects[i].eff;
6502 }
6503 for (GSC_GENOLEN_T j = markerix_current; j < e.n_markers; ++j) {
6504 e.cumn_alleles[j] = n_effects;
6505 }
6506 }
6507
6508 GSC_DELETE_BUFFER(raweffects);
6510}
6511
6519 switch (c.cell_len) {
6520 case 1:
6521 switch (c.cell[0]) {
6522 case '0':
6523 case '1':
6524 case '2':
6526 case 'G': // G
6527 case 'A': // A
6528 case 'T': // T
6529 case 'C': // C
6530 case 'R': // G/A
6531 case 'Y': // T/C
6532 case 'M': // A/C
6533 case 'K': // G/T
6534 case 'S': // G/C
6535 case 'W': // A/T
6536 case 'N': // any
6538 default:
6539 break;
6540 }
6541 break;
6542 case 2:
6543 if (c.cell[0] == 'm') { // m[numeric] case, which is probably a marker not an allele pair
6544 switch (c.cell[1]) {
6545 case '0':
6546 case '1':
6547 case '2':
6548 case '3':
6549 case '4':
6550 case '5':
6551 case '6':
6552 case '7':
6553 case '8':
6554 case '9':
6556 default:
6557 break;
6558 }
6559 }
6561 case 3:
6562 if (c.cell[1] == '/') {
6564 }
6565 break;
6566 default:
6567 break;
6568 }
6570}
6571
6578 GSC_GENOLEN_T markerix,
6579 enum gsc_GenotypeFileCellStyle style,
6580 char* cell,
6581 gsc_SimData* forrng) {
6582 char* pos = loc.localAM->alleles[loc.localPos] + 2*markerix;
6583 int phase = 0;
6584 switch (style) {
6586 pos[0] = cell[0];
6587 pos[1] = cell[1];
6588 break;
6590 pos[0] = cell[0];
6591 pos[1] = cell[2];
6592 break;
6594 switch (cell[0]) {
6595 case '0':
6596 pos[0] = 'T';
6597 pos[1] = 'T';
6598 break;
6599 case '1':
6600 phase = rnd_pcg_range(&forrng->rng,0,1);
6601 pos[phase] = 'A';
6602 pos[1-phase] = 'T';
6603 break;
6604 case '2':
6605 pos[0] = 'A';
6606 pos[1] = 'A';
6607 break;
6608 }
6609 break;
6611 switch (cell[0]) {
6612 case 'G': // G
6613 pos[0] = 'G';
6614 pos[1] = 'G';
6615 break;
6616 case 'A': // A
6617 pos[0] = 'A';
6618 pos[1] = 'A';
6619 break;
6620 case 'T': // T
6621 pos[0] = 'T';
6622 pos[1] = 'T';
6623 break;
6624 case 'C': // C
6625 pos[0] = 'C';
6626 pos[1] = 'C';
6627 break;
6628 case 'R': // G/A
6629 phase = rnd_pcg_range(&forrng->rng,0,1);
6630 pos[phase] = 'G';
6631 pos[1-phase] = 'A';
6632 break;
6633 case 'Y': // T/C
6634 phase = rnd_pcg_range(&forrng->rng,0,1);
6635 pos[phase] = 'T';
6636 pos[1-phase] = 'C';
6637 break;
6638 case 'M': // A/C
6639 phase = rnd_pcg_range(&forrng->rng,0,1);
6640 pos[phase] = 'A';
6641 pos[1-phase] = 'C';
6642 break;
6643 case 'K': // G/T
6644 phase = rnd_pcg_range(&forrng->rng,0,1);
6645 pos[phase] = 'G';
6646 pos[1-phase] = 'T';
6647 break;
6648 case 'S': // G/C
6649 phase = rnd_pcg_range(&forrng->rng,0,1);
6650 pos[phase] = 'G';
6651 pos[1-phase] = 'C';
6652 break;
6653 case 'W': // A/T
6654 phase = rnd_pcg_range(&forrng->rng,0,1);
6655 pos[phase] = 'A';
6656 pos[1-phase] = 'T';
6657 break;
6658 default:
6659 break;
6660 }
6661 break;
6662 default: break;
6663 }
6664}
6665
6670 gsc_GroupNum allocation_group) {
6671 struct gsc_EmptyListNavigator me = { .d=d,
6672 .localPos = 0,
6673 .alloctogroup = allocation_group,
6674 .currentid = d->current_id };
6676 me.d->n_labels,
6677 me.d->label_defaults, 0);
6678 me.localAM = me.firstAM;
6679 return me;
6680}
6681
6687 it->localAM = it->firstAM;
6688 it->localPos = 0;
6689 if (1 > it->localAM->n_genotypes) {
6690 it->localAM->n_genotypes = 1;
6691 it->localAM->alleles[0] = gsc_malloc_wrap(sizeof(char) * (it->localAM->n_markers<<1),GSC_TRUE);
6692 memset(it->localAM->alleles[0], 0, sizeof(char) * (it->localAM->n_markers<<1));
6693 it->localAM->names[0] = NULL;
6694 it->localAM->groups[0] = it->alloctogroup;
6695 ++(it->currentid.id);
6696 it->localAM->ids[0] = it->currentid;
6697 }
6698 return (gsc_GenoLocation){.localAM=it->localAM, .localPos =it->localPos};
6699}
6700
6706 if (CONTIG_WIDTH - 1 == it->localPos) {
6707 if (NULL == it->localAM->next) {
6709 it->d->n_labels,
6710 it->d->label_defaults, 0);
6711 it->localAM->next = next;
6712 it->localAM = next;
6713 it->localPos = 0;
6714 } else {
6715 it->localAM = it->localAM->next;
6716 it->localPos = 0;
6717 }
6718 } else {
6719 ++(it->localPos);
6720 }
6721
6722 if (it->localAM->n_genotypes <= it->localPos) {
6723 if (1 < it->localPos - it->localAM->n_genotypes) {
6724 fprintf(stderr,"EmptyListNavigator invalid\n");
6725 return INVALID_GENO_LOCATION;
6726 }
6727 ++(it->localAM->n_genotypes);
6728
6729 it->localAM->alleles[it->localPos] = gsc_malloc_wrap(sizeof(char) * (it->localAM->n_markers<<1),GSC_TRUE);
6730 memset(it->localAM->alleles[it->localPos], 0, sizeof(char) * (it->localAM->n_markers<<1));
6731 it->localAM->names[it->localPos] = NULL;
6732 it->localAM->groups[it->localPos] = it->alloctogroup;
6733 ++(it->currentid.id);
6734 it->localAM->ids[it->localPos] = it->currentid;
6735 }
6736
6737 return (gsc_GenoLocation){.localAM=it->localAM, .localPos =it->localPos};
6738}
6739
6745 if (NULL == it->d->m) {
6746 it->d->m = it->firstAM;
6747 } else {
6748 gsc_AlleleMatrix* listend = it->d->m;
6749 while (NULL != listend->next) {
6750 listend = listend->next;
6751 }
6752 listend->next = it->firstAM;
6754 }
6755 it->d->current_id = it->currentid;
6756}
6757
6785 const SimData* d,
6786 const gsc_TableFileCell* cellqueue,
6787 const size_t firstrowlen,
6788 const size_t queuelen,
6789 struct gsc_GenotypeFile_MatrixFormat format,
6790 const char* filenameforlog) {
6791
6792 if (format.markers_as_rows == GSC_TRUE || format.markers_as_rows == GSC_FALSE) {
6793 // pass
6794 } else if (d->genome.n_maps == 0) {
6795 // If there is no genetic map, we cannot check the row/column headers to see if any of them match the marker names..
6796 // Default to markers being rows
6797
6798 printf("(Loading %s) Format axis: genetic markers are -rows-, founder lines are |columns| (by assumption when no genetic map is loaded)\n", filenameforlog);
6799 printf("(Loading %s) No genetic map is loaded, will invent a map where all markers are unlinked/show independent assortment\n", filenameforlog);
6800 format.markers_as_rows = GSC_TRUE;
6801
6802 } else if (format.has_header == GSC_FALSE) {
6803 printf("(Loading %s) Format axis: genetic markers are -rows-, founder lines are |columns| "
6804 "(by assumption when matrix has no header row)\n", filenameforlog);
6805 format.markers_as_rows = GSC_TRUE;
6806
6807 } else {
6808 // Note: by here, either the user has told us there is a header row, or we get to detect whether there is one. So will investigate it by comparing names to what's in our map
6809 // taken from older function gsc_helper_genotypefile_matrix_check_markers_are_rows
6810 int firstsafeheaderindex = -1;
6811 if (firstrowlen > 1) {
6812 firstsafeheaderindex = 1;
6813 } else if (firstrowlen == 1 && queuelen > firstrowlen + 1) { // second row has more than one cell read.
6814 firstsafeheaderindex = 0; // assume there's no corner cell
6815 format.has_header = GSC_TRUE;
6816 }
6817
6818 if (firstsafeheaderindex >= 0) {
6819 // Don't check the "first" cell. It might be a corner cell between the two headers, whose value should be ignored
6820 // Check the next cell in the first row.
6821 if (gsc_get_index_of_genetic_marker(cellqueue[firstsafeheaderindex].cell, d->genome, NULL)) {
6822 printf("(Loading %s) Format axis: genetic markers are |columns|, founder lines are -rows-\n", filenameforlog);
6823 format.markers_as_rows = GSC_FALSE;
6824 format.has_header = GSC_TRUE;
6825 return format;
6826 }
6827
6828 // If that wasn't a match, check the first row header, if it exists:
6829 if (queuelen > firstrowlen && !cellqueue[firstrowlen].eof &&
6830 gsc_get_index_of_genetic_marker(cellqueue[firstrowlen].cell, d->genome, NULL)) {
6831 printf("(Loading %s) Format axis: genetic markers are -rows-, founder lines are |columns|\n", filenameforlog);
6832 format.markers_as_rows = GSC_TRUE;
6833 return format;
6834 }
6835
6836 // Check remaining column headers
6837 for (size_t i = firstsafeheaderindex + 1; i < firstrowlen; ++i) {
6838 if (gsc_get_index_of_genetic_marker(cellqueue[i].cell, d->genome, NULL)) {
6839 printf("(Loading %s) Format axis: genetic markers are |columns|, founder lines are -rows-\n", filenameforlog);
6840 format.markers_as_rows = GSC_FALSE;
6841 format.has_header = GSC_TRUE;
6842 return format;
6843 }
6844 }
6845
6846 }
6847 printf("(Loading %s) Format axis: genetic markers are -rows-, founder lines are |columns| (by default file format)\n", filenameforlog);
6848 format.markers_as_rows = GSC_TRUE;
6849 }
6850 return format;
6851
6852}
6853
6866 const gsc_TableFileCell* cellqueue,
6867 const size_t firstrowlen,
6868 const size_t queuelen,
6869 struct gsc_GenotypeFile_MatrixFormat format,
6870 const char* filenameforlog) {
6871
6872 _Bool style_detected = 0;
6873 _Bool single_col_file = 0;
6874 // 1. Detect format if not yet provided.
6875 if (format.cell_style == GSC_GENOTYPECELLSTYLE_UNKNOWN) {
6876 style_detected = 1;
6877
6878 if (firstrowlen == queuelen || cellqueue[firstrowlen].eof) { // There is only one row. Short-circuiting necessary
6879 // if there is also only one column, we have no body cells to detect the style of
6880 if (firstrowlen > 1) {
6881 // Detection path for a single-line file. If it has a header, then this value might end up ignored
6883 } else {
6884 single_col_file = 1; // one-cell file. needs the warning.
6885 }
6886 } else { // there is more than one row
6887 // If there is only one column, there are no body cells with style to detect
6888 if (firstrowlen + 1 < queuelen && cellqueue[firstrowlen+1].predNewline < 1) {
6889 // Detection path. There exists a second cell on the second line that we can read
6890 format.cell_style = gsc_helper_genotype_matrix_identify_cell_style(cellqueue[firstrowlen+1]);
6891 } else {
6892 single_col_file = 1;
6893 }
6894 }
6895 }
6896
6897 // 2. Print cell style detection logs
6898 if (style_detected) {
6899 switch(format.cell_style) {
6900 case GSC_GENOTYPECELLSTYLE_PAIR: printf("(Loading %s) Allele format: phased allele pairs\n", filenameforlog); break;
6901 case GSC_GENOTYPECELLSTYLE_SLASHPAIR: printf("(Loading %s) Allele format: phased allele pairs (slash-separated)\n", filenameforlog); break;
6902 case GSC_GENOTYPECELLSTYLE_COUNT: printf("(Loading %s) Allele format: reference allele counts (phase will be randomised)\n", filenameforlog); break;
6903 case GSC_GENOTYPECELLSTYLE_ENCODED: printf("(Loading %s) Allele format: IUPAC encoded pair (phase will be randomised)\n", filenameforlog); break;
6905 if (single_col_file || firstrowlen == queuelen ||
6906 (firstrowlen + 1 == queuelen && cellqueue[firstrowlen].eof && cellqueue[firstrowlen].cell_len == 0)) {
6907 printf("(Loading %s) Warning: empty genotype matrix. No genotypes will be loaded.\n", filenameforlog);
6908 } else {
6909 fprintf(stderr,"(Loading %s) Failure: Unable to determine the formatting of pairs of alleles."
6910 " Check genomicSimulation manual for accepted allele pair encodings\n", filenameforlog);
6911 }
6912 }
6913 }
6914
6915 return format;
6916}
6917
6946 const gsc_TableFileCell* cellqueue,
6947 const size_t firstrowlen,
6948 const size_t queuelen,
6949 struct gsc_GenotypeFile_MatrixFormat format,
6950 const char* filenameforlog) {
6951 // Validity check: if genetic markers are columns, header row is mandatory
6952 if (format.has_header == GSC_FALSE && format.markers_as_rows == GSC_FALSE) {
6953 printf("(Loading %s) Failure: genetic markers cannot be represented by columns when matrix has no header row\n", filenameforlog);
6954 format.has_header = GSC_NA;
6955 return format;
6956 }
6957
6958 // Detect header if we need to detect it.
6959 if (format.has_header != GSC_FALSE && format.has_header != GSC_TRUE) {
6960 if (firstrowlen == 1) {
6961 // we could have a single-column file (no header assumed), or
6962 // we could be a two-column file with no corner cell (must have a header)
6963 if (queuelen > 2) {
6964 if (cellqueue[2].eof || cellqueue[2].predNewline) {
6965 format.has_header = GSC_FALSE; // single column file
6966 } else {
6967 format.has_header = GSC_TRUE;
6968 }
6969 } // else can't draw any conclusions.
6970
6971 } else if (format.cell_style != GSC_GENOTYPECELLSTYLE_UNKNOWN) {
6972 // Idea: if we find a cell in the first row that doesn't match the expected cell style, then that first row is probably a header
6973 format.has_header = GSC_FALSE;
6974 for (size_t i = 1; i < firstrowlen; ++i) { // ignore first cell in row, it could be a corner cell or row header
6975 if (gsc_helper_genotype_matrix_identify_cell_style(cellqueue[i]) != format.cell_style) {
6976 format.has_header = GSC_TRUE;
6977 break;
6978 }
6979 }
6980 } // else don't know how to detect.
6981
6982 switch (format.has_header) {
6983 case GSC_FALSE: printf("(Loading %s) Format: genotype matrix without header row\n", filenameforlog); break;
6984 case GSC_TRUE: printf("(Loading %s) Format: genotype matrix with header row\n", filenameforlog); break;
6985 default: fprintf(stderr,"(Loading %s) Failure: Unable to determine whether file has header row\n", filenameforlog); break;
6986 }
6987 }
6988
6989 return format;
6990}
6991
7008 const size_t ncellsfirstrow,
7009 const size_t ncellssecondrow,
7010 const _Bool secondrowheaderisempty) {
7011 if (ncellssecondrow == ncellsfirstrow + 1) {
7012 return GSC_FALSE;
7013 } else if (ncellssecondrow == ncellsfirstrow) {
7014 if (secondrowheaderisempty) {
7015 return GSC_FALSE; //genotype name is simply empty, making the second row look one column shorter than reality
7016 } else {
7017 return GSC_TRUE;
7018 }
7019 } else if (ncellssecondrow == ncellsfirstrow - 1 && secondrowheaderisempty) {
7020 return GSC_TRUE; // genotype name on row 2 is empty but corner cell is not
7021 } else {
7022 return GSC_NA;
7023 }
7024}
7025
7055 .spec={(struct gsc_GenotypeFile_MatrixFormat){.cell_style=cell_style,
7056 .has_header=has_header,
7057 .markers_as_rows=markers_as_rows}}};
7058}
7059
7076 const char* filename,
7077 const gsc_FileFormatSpec format) {
7078 if (filename == NULL) return NO_GROUP;
7080 fprintf(stderr,"Non-genotype-matrix format specification provided to genotype matrix file loader function\n");
7081 return NO_GROUP;
7082 }
7083
7084 // Part 1: Detect file formatting details
7085 struct gsc_GenotypeFile_MatrixFormat format_detected =
7086 { .has_header = GSC_NA, .markers_as_rows = GSC_NA, .cell_style = GSC_GENOTYPECELLSTYLE_UNKNOWN };
7087 if (format.filetype == GSC_GENOTYPEFILE_MATRIX) {
7088 format_detected = format.spec.matrix;
7089 }
7090 size_t queuesize = 0;
7091
7093 if (tbl.fp == NULL) { return NO_GROUP; }
7094 // Read one row + 2 cells (if possible)
7095 GSC_CREATE_BUFFER(cellsread,gsc_TableFileCell,100);
7096 size_t ncellsread = 0;
7097 do {
7098 cellsread[ncellsread] = gsc_tablefilereader_get_next_cell(&tbl);
7099 gsc_tablefilecell_deep_copy(&cellsread[ncellsread]);
7100 ++ncellsread;
7101 if (ncellsread >= cellsreadcap) {
7102 GSC_STRETCH_BUFFER(cellsread,2*ncellsread);
7103 }
7104 } while (!cellsread[ncellsread-1].eof && (ncellsread <= 1 || !cellsread[ncellsread-1].predNewline));
7105 size_t ncellsfirstrow = (cellsread[ncellsread-1].eof && cellsread[ncellsread-1].cell_len > 0) ? ncellsread : ncellsread - 1;
7106 if (!cellsread[ncellsread-1].eof) { // read one more cell if possible
7107 cellsread[ncellsread] = gsc_tablefilereader_get_next_cell(&tbl);
7108 gsc_tablefilecell_deep_copy(&cellsread[ncellsread]);
7109 ++ncellsread;
7110 if (ncellsread >= cellsreadcap) {
7111 GSC_STRETCH_BUFFER(cellsread,2*ncellsread);
7112 }
7113 }
7114 queuesize = ncellsread; // so that we know how many to free if we failure_exit
7115 if (ncellsread <= 1) { // file is an EOF only
7116 goto failure_exit;
7117 }
7118 //int is_onecol_file = cellsread[ncellsfirstrow + 1].predNewline > 0 || ncellsread == 2; // ncellsread == 2 means we read one cell, then an EOF
7119 int is_onerow_file = ncellsread == ncellsfirstrow || cellsread[ncellsfirstrow].eof; // short-circuiting essential!
7120
7121 format_detected = gsc_helper_genotypefile_matrix_detect_orientation(d, cellsread, ncellsfirstrow, ncellsread, format_detected, filename);
7122 format_detected = gsc_helper_genotypefile_matrix_detect_cellstyle(cellsread, ncellsfirstrow, ncellsread, format_detected, filename);
7123 format_detected = gsc_helper_genotypefile_matrix_detect_header(cellsread, ncellsfirstrow, ncellsread, format_detected, filename);
7124 if ((format_detected.has_header != GSC_FALSE && format_detected.has_header != GSC_TRUE) ||
7125 (format_detected.markers_as_rows != GSC_FALSE && format_detected.markers_as_rows != GSC_TRUE) ||
7126 format_detected.cell_style == GSC_GENOTYPECELLSTYLE_UNKNOWN) {
7127 goto failure_exit;
7128 }
7129
7130 GSC_LOGICVAL format_has_corner_cell = GSC_NA;
7131 // If markers as columns, we do need to know how many cells are in the second row in order to detect a corner cell
7132 if (!format_detected.markers_as_rows && !is_onerow_file) {
7133 // Read rest of second row
7134 while (!cellsread[ncellsread-1].eof && !cellsread[ncellsread-1].predNewline) {
7135 cellsread[ncellsread] = gsc_tablefilereader_get_next_cell(&tbl);
7136 gsc_tablefilecell_deep_copy(&cellsread[ncellsread]);
7137 ++ncellsread;
7138 if (ncellsread >= cellsreadcap) {
7139 GSC_STRETCH_BUFFER(cellsread,2*ncellsread);
7140 }
7141 }
7142 // Detect corner cell
7143 queuesize = ncellsread; // so that we know how many to free if we failure_exit
7144 size_t ncellssecondrow = ncellsread - ncellsfirstrow - 1;
7145 format_has_corner_cell = gsc_helper_genotypefile_matrix_detect_cornercell_presence(ncellsfirstrow, ncellssecondrow, cellsread[ncellsfirstrow].predCol > 0);
7146 if (format_has_corner_cell == GSC_NA) {
7147 fprintf(stderr, "(Loading %s) Failure: Header row length and second row length do not align\n", filename);
7148 goto failure_exit;
7149 }
7150 }
7151
7152 // Create the queue of cells to parse (exclude header from this queue, because it needs to be dealt with differently)
7153 gsc_TableFileCell* cellqueue = cellsread;
7154 //queuesize = ncellsread; (already done above)
7155 if (format_detected.has_header) {
7156 cellqueue = cellsread + ncellsfirstrow;
7157 queuesize = ncellsread - ncellsfirstrow;
7158 }
7159
7160 // PART 2: Create uniform-spaced map, if we have no map currently
7161 _Bool build_map_from_rows = 0;
7162 if (d->genome.n_markers == 0) {
7163 if (format_detected.markers_as_rows) {
7164 build_map_from_rows = 1;
7165 // We're going to have to do an independent read of the file to extract these. Will be a bit slower.
7168 GSC_GENOLEN_T nmarkersread = format_detected.has_header ? 0 : 1;
7169 do {
7171 if (cell.predNewline) { ++nmarkersread; }
7172 if (!cell.isCellShallow) { GSC_FREE(cell.cell); }
7173 } while (!cell.eof);
7175 if (cell.predNewline) { // there's a newline before eof, so no real actual last row
7176 --nmarkersread;
7177 }
7178
7179 d->genome.n_markers = nmarkersread;
7182 gsc_create_unlinked_recombmap(d,0,NULL); // create based on the markers we've saved in 'genome'
7183
7184 } else { // markers as columns
7185 if (!format_detected.has_header) { // you should not be able to get here. // assert(format_detected.has_header == GSC_TRUE);
7186 fprintf(stderr, "(Loading %s) Failure: Genotype matrix with markers as columns but no header row is an unsupported file type (there is no way to tell which column is which marker)\n", filename);
7187 goto failure_exit;
7188 }
7189
7190 size_t i = format_has_corner_cell ? 1 : 0; // starting index for iterating through names
7191 d->genome.n_markers = ncellsfirstrow - i;
7194 for (size_t j = 0; j < d->genome.n_markers; ++i, ++j) {
7195 //gsc_tablefilecell_deep_copy(&cellqueue[i]); // already deep copied
7196 d->genome.marker_names[j] = cellsread[i].cell;
7197 cellsread[i].isCellShallow = GSC_TRUE; // prevent deletion
7199 }
7201 gsc_create_unlinked_recombmap(d,0,NULL); // create based on the markers we've saved in 'genome'
7202 }
7203 }
7204
7205 // PART 3: Parse file into an AlleleMatrix
7206
7209 GSC_GENOLEN_T nvalidmarker = 0;
7210 size_t n_cols = 0;
7211 if (format_detected.markers_as_rows) {
7212
7213 gsc_GenoLocation loc;
7214 gsc_TableFileCell ncell;
7215 n_cols = (format_detected.has_header) ? ncellsfirstrow + 1 : ncellsfirstrow; // assume first row has no corner cell for now
7216 _Bool first = 1;
7217 _Bool have_valid_marker = 0; GSC_GENOLEN_T markerix;
7218 GSC_GLOBALX_T column = 0;
7219 size_t row = 0;
7220 do {
7221 ncell = gsc_helper_tablefilereader_get_next_cell_wqueue(&tbl,&cellqueue,&queuesize);
7222
7223 if (ncell.cell != NULL) {
7224 if (ncell.predNewline || first) {
7225
7226
7227 if (build_map_from_rows) {
7228 ++nvalidmarker; have_valid_marker = 1;
7229 if (first) {
7230 markerix = 0;
7231 } else {
7232 markerix++;
7233 }
7235 d->genome.marker_names[markerix] = ncell.cell;
7236 ncell.cell = NULL; // prevent deletion
7237 } else {
7238 char tmp = ncell.cell[ncell.cell_len]; ncell.cell[ncell.cell_len] = '\0';
7239
7240 have_valid_marker = gsc_get_index_of_genetic_marker(ncell.cell, d->genome, &markerix);
7241
7242 nvalidmarker += have_valid_marker;
7243 ncell.cell[ncell.cell_len] = tmp;
7244 }
7245
7246
7247 // Then, after reading first row, detect what our expected row length is, if defaults don't suit.
7248 if (row == 1 && format_detected.has_header) {
7249 if (column + 1 != ncellsfirstrow && column + 1 != ncellsfirstrow + 1) {
7250 fprintf(stderr, "(Loading %s) Failure: Header row length and second row length do not align\n", filename);
7251 goto failure_exit;
7252 } else {
7253 n_cols = column + 1;
7254 }
7255 }
7256 first = 0;
7257 column = 0;
7258 ++row;
7259
7260 } else if (ncell.predCol) { // any number of column spacers treated as one column gap when reading a genotype matrix
7261 ++column;
7262 if (have_valid_marker && column < n_cols) {
7263 loc = (1 == column) ? gsc_emptylistnavigator_get_first(&it) : gsc_emptylistnavigator_get_next(&it);
7264 gsc_helper_genotypecell_to_allelematrix(loc,markerix,format_detected.cell_style,ncell.cell,d);
7265 } // Note we ignore all extra cells in all rows
7266 }
7267 }
7268
7269 if (!ncell.isCellShallow) { GSC_FREE(ncell.cell); }
7270 } while (!ncell.eof);
7271 if (row == 1 && format_detected.has_header) {
7272 if (column + 1 != ncellsfirstrow && column + 1 != ncellsfirstrow + 1) {
7273 fprintf(stderr, "(Loading %s) Failure: Header row length and second row length do not align\n", filename);
7274 goto failure_exit;
7275 } else {
7276 n_cols = column + 1;
7277 }
7278 }
7279
7280 // Then save the genotype names
7281 if (format_detected.has_header) {
7282 format_has_corner_cell = gsc_helper_genotypefile_matrix_detect_cornercell_presence(ncellsfirstrow, n_cols, cellsread[ncellsfirstrow].predCol > 0);
7283 size_t i = format_has_corner_cell ? 1 : 0;
7284 gsc_GenoLocation loc;
7285 for (size_t j = 0; i < ncellsfirstrow; ++i, ++j) {
7287 // assert(!cellsread[i].isShallowCopy);
7288 gsc_set_name(loc,cellsread[i].cell); // using names here so no need to free them. Since they're in cellsread
7289 cellsread[i].isCellShallow = GSC_TRUE; // prevent deletion
7290 }
7291 }
7292
7293 // Then finalise the map, if we're creating one:
7294 if (build_map_from_rows) {
7295 for (GSC_GENOLEN_T i = 0; i < d->genome.n_markers; ++i) {
7297 }
7299 }
7300
7301 } else { // markers as columns
7302 if (!format_detected.has_header) { // you should not be able to get here.
7303 fprintf(stderr, "(Loading %s) Failure: Genotype matrix with markers as columns but no header row is an unsupported file type (there is no way to tell which column is which marker)\n", filename);
7304 goto failure_exit;
7305 }
7306
7307 // Identify the marker corresponding to each column
7308 size_t i = format_has_corner_cell ? 1 : 0;
7309 size_t n_col = ncellsfirstrow + (1-i);
7310 GSC_GENOLEN_T* markerixs = gsc_malloc_wrap(sizeof(*markerixs)*ncellsfirstrow,GSC_TRUE);
7311 for (GSC_GENOLEN_T j = 0; i < ncellsfirstrow; ++i, ++j) {
7312 markerixs[j] = d->genome.n_markers;
7313 nvalidmarker += gsc_get_index_of_genetic_marker(cellsread[i].cell, d->genome, &markerixs[j]);
7314 }
7315
7316 // Read the table
7317 _Bool first = 1;
7318 GSC_GLOBALX_T row = 0;
7319 size_t column = 0; // we count column numbers from 1 for the first body cell. sorry for the inconsistency with the branch of the if statement above.
7321 gsc_TableFileCell ncell;
7322 do {
7323 ncell = gsc_helper_tablefilereader_get_next_cell_wqueue(&tbl,&cellqueue,&queuesize);
7324
7325 if (ncell.cell != NULL) {
7326 if (ncell.predNewline) {
7328 first = 0;
7329
7330 ++row;
7331 column = 0;
7332 if (ncell.predCol) { // missing name.
7333 gsc_set_name(loc,NULL);
7334 } else {
7336 gsc_set_name(loc,ncell.cell);
7337 ncell.isCellShallow = GSC_TRUE; // so it does not get deleted
7338 }
7339 }
7340
7341 if (ncell.predCol) {
7342 ++column;
7343 if (column < n_col && markerixs[column-1] < d->genome.n_markers) {
7344 gsc_helper_genotypecell_to_allelematrix(loc,markerixs[column-1],format_detected.cell_style,ncell.cell,d);
7345 }
7346 }
7347 }
7348
7349 if (!ncell.isCellShallow) { GSC_FREE(ncell.cell); }
7350 } while (!ncell.eof);
7351
7352 GSC_FREE(markerixs);
7353
7354 }
7355
7356 // PART 4: Tidy and clean and exit
7357 GSC_GLOBALX_T ngenos = 0;
7358 AlleleMatrix* tmpam = it.firstAM;
7359 while (tmpam != NULL) {
7360 ngenos += tmpam->n_genotypes;
7361 tmpam = tmpam->next;
7362 }
7363 printf("(Loading %s) %lu genotype(s) of %lu marker(s) were loaded.\n", filename,
7364 (long unsigned int) ngenos, (long unsigned int) nvalidmarker);
7365 if (ngenos == 0) {
7367 goto failure_exit;
7368 }
7370 ++d->n_groups;
7371
7372 // ... cleaning up the header row
7373 if (format_detected.has_header) {
7374 for (size_t j = 0; j < ncellsfirstrow; ++j) {
7375 if (!cellsread[j].isCellShallow) { GSC_FREE(cellsread[j].cell); }
7376 }
7377 }
7378 GSC_DELETE_BUFFER(cellsread);
7380 return group;
7381
7382 failure_exit:
7383 // Clean up structures and return, having loaded no genotypes
7384 // ... cleaning up unprocessed cells in the queue
7385 for (size_t i = 1; i <= queuesize; ++i) {
7386 if (!cellsread[ncellsread-i].isCellShallow) {
7387 GSC_FREE(cellsread[ncellsread-i].cell);
7388 cellsread[ncellsread-i].isCellShallow = GSC_TRUE;
7389 }
7390 }
7391 // ... cleaning up the header row
7392 if (format_detected.has_header) {
7393 for (size_t j = 0; j < ncellsfirstrow; ++j) {
7394 if (!cellsread[j].isCellShallow) { GSC_FREE(cellsread[j].cell); }
7395 }
7396 }
7397 GSC_DELETE_BUFFER(cellsread);
7399 return NO_GROUP;
7400}
7401
7420 const char* filename,
7421 const gsc_FileFormatSpec format) {
7422 return gsc_load_data_files(d,filename,NULL,NULL,format).group;
7423}
7424
7444 const char* genotype_file,
7445 const char* map_file,
7446 const char* effect_file,
7447 const gsc_FileFormatSpec format) {
7448 // Parse file suffix for file type, if it was not already provided
7449 enum gsc_GenotypeFileType type = format.filetype;
7450
7451 if (type == GSC_GENOTYPEFILE_UNKNOWN && genotype_file != NULL) {
7453 char* suffix = strrchr(genotype_file,'.');
7454 if (suffix != NULL) {
7455 if (strcmp(suffix,".bed") == 0) {
7456 type = GSC_GENOTYPEFILE_BED;
7457 } else if (strcmp(suffix,".ped") == 0) {
7458 type = GSC_GENOTYPEFILE_PED;
7459 } else if (strcmp(suffix,".vcf") == 0) {
7460 type = GSC_GENOTYPEFILE_VCF;
7461 }
7462 }
7463 }
7464
7465 struct gsc_MultiIDSet out = { .group=NO_GROUP, .map=NO_MAP, .effSet=NO_EFFECTSET };
7466
7467 switch (type) {
7469 //if (detectedtype) { printf("Will attempt to parse %s as a plink .bed file\n", filename); }
7470 fprintf(stderr,"plink .bed file parsing not yet implemented\n");
7471 break;
7473 fprintf(stderr,"plink .ped file parsing not yet implemented\n");
7474 break;
7476 fprintf(stderr,"vcf file parsing not yet implemented\n");
7477 break;
7478 default:
7479 //printf("(Loading files) Will treat %s as a genotype matrix (see genomicSimulation's default input file types)\n", genotype_file);
7480 out.map = gsc_load_mapfile(d, map_file);
7481 out.group = gsc_load_genotypefile_matrix(d, genotype_file, format);
7482 out.effSet = gsc_load_effectfile(d, effect_file);
7483 }
7484
7485 return out;
7486}
7487
7488/*--------------------------Recombination counts-----------------------------*/
7489
7524int* gsc_calculate_min_recombinations_fw1(gsc_SimData* d, gsc_MapID mapid, char* parent1, unsigned int p1num, char* parent2,
7525 unsigned int p2num, char* offspring, int certain) {
7526 if (d->genome.n_maps < 1) {
7527 fprintf(stderr,"Need at least one recombination map loaded to estimate recombinations\n");
7528 return NULL;
7529 }
7530 int mapix = 0;
7531 if (mapid.id != NO_MAP.id) { mapix = gsc_get_index_of_map(d, mapid); }
7532 if (mapix >= d->genome.n_maps) {
7533 fprintf(stderr,"We don't have that recombination maps loaded\n");
7534 return NULL;
7535 }
7536 gsc_RecombinationMap map = d->genome.maps[mapix];
7537
7538 int* origins = gsc_malloc_wrap(sizeof(int) * d->genome.n_markers,GSC_TRUE);
7539 memset(origins,0,sizeof(*origins)*d->genome.n_markers);
7540 int p1match, p2match;
7541 int previous = 0;
7542
7543
7544 for (int chr = 0; chr < map.n_chr; ++chr) {
7545 //RPACKINSERT R_CheckUserInterrupt();
7546
7547 switch (map.chrs[chr].type) {
7548 case GSC_LINKAGEGROUP_SIMPLE:
7549 for (int i = 0; i < map.chrs[chr].map.simple.n_markers; ++i) {
7550 p1match = gsc_has_same_alleles(parent1, offspring, i);
7551 p2match = gsc_has_same_alleles(parent2, offspring, i);
7552 if (p1match && !p2match) {
7553 origins[map.chrs[chr].map.simple.first_marker_index + i] = p1num;
7554 previous = p1num;
7555 } else if (p2match && !p1match) {
7556 origins[map.chrs[chr].map.simple.first_marker_index + i] = p2num;
7557 previous = p2num;
7558 } else {
7559 if (certain) {
7560 origins[map.chrs[chr].map.simple.first_marker_index + i] = 0;
7561 } else {
7562 origins[map.chrs[chr].map.simple.first_marker_index + i] = previous;
7563 }
7564 }
7565 }
7566 break;
7567
7568 case GSC_LINKAGEGROUP_REORDER:
7569 for (int i = 0; i < map.chrs[chr].map.reorder.n_markers; ++i) {
7570 p1match = gsc_has_same_alleles(parent1, offspring, i);
7571 p2match = gsc_has_same_alleles(parent2, offspring, i);
7572 if (p1match && !p2match) {
7573 origins[map.chrs[chr].map.reorder.marker_indexes[i]] = p1num;
7574 previous = p1num;
7575 } else if (p2match && !p1match) {
7576 origins[map.chrs[chr].map.reorder.marker_indexes[i]] = p2num;
7577 previous = p2num;
7578 } else {
7579 if (certain) {
7580 origins[map.chrs[chr].map.reorder.marker_indexes[i]] = 0;
7581 } else {
7582 origins[map.chrs[chr].map.reorder.marker_indexes[i]] = previous;
7583 }
7584 }
7585 }
7586 break;
7587 }
7588
7589 }
7590 return origins;
7591}
7592
7633int* gsc_calculate_min_recombinations_fwn(gsc_SimData* d, gsc_MapID mapid, char* parent1, unsigned int p1num, char* parent2,
7634 unsigned int p2num, char* offspring, int window_size, int certain) {
7635 if (d->genome.n_maps < 1) {
7636 fprintf(stderr,"Need at least one recombination map loaded to estimate recombinations\n");
7637 return NULL;
7638 }
7639 int mapix = 0;
7640 if (mapid.id != NO_MAP.id) { mapix = gsc_get_index_of_map(d, mapid); }
7641 if (mapix >= d->genome.n_maps) {
7642 fprintf(stderr,"We don't have that recombination maps loaded\n");
7643 return NULL;
7644 }
7645 gsc_RecombinationMap map = d->genome.maps[mapix];
7646
7647
7648 int* origins = gsc_malloc_wrap(sizeof(int) * d->genome.n_markers,GSC_TRUE);
7649 memset(origins,0,sizeof(*origins)*d->genome.n_markers);
7650 int p1match, p2match;
7651 int previous = 0, window_range = (window_size - 1)/2, i;
7652
7653 for (int chr = 0; chr < map.n_chr; ++chr) {
7654 //RPACKINSERT R_CheckUserInterrupt();
7655
7656 switch (map.chrs[chr].type) {
7657 case GSC_LINKAGEGROUP_SIMPLE:
7658 for (i = 0; i < window_range; ++i) {
7659 origins[map.chrs[chr].map.simple.first_marker_index + i] = 0;
7660 }
7661 for (; i < map.chrs[chr].map.simple.n_markers - window_range; ++i) {
7662 p1match = gsc_has_same_alleles_window(parent1, offspring, i, window_size);
7663 p2match = gsc_has_same_alleles_window(parent2, offspring, i, window_size);
7664 if (p1match && !p2match) {
7665 origins[map.chrs[chr].map.simple.first_marker_index + i] = p1num;
7666 previous = p1num;
7667 } else if (p2match && !p1match) {
7668 origins[map.chrs[chr].map.simple.first_marker_index + i] = p2num;
7669 previous = p2num;
7670 } else {
7671 if (certain) {
7672 origins[map.chrs[chr].map.simple.first_marker_index + i] = 0;
7673 } else {
7674 origins[map.chrs[chr].map.simple.first_marker_index + i] = previous;
7675 }
7676 }
7677 }
7678 for (; i < map.chrs[chr].map.simple.n_markers; ++i) {
7679 origins[map.chrs[chr].map.simple.first_marker_index + i] = 0;
7680 }
7681 break;
7682
7683 case GSC_LINKAGEGROUP_REORDER:
7684 for (i = 0; i < window_range; ++i) {
7685 origins[map.chrs[chr].map.reorder.marker_indexes[i]] = 0;
7686 }
7687 for (; i < map.chrs[chr].map.reorder.n_markers - window_range; ++i) {
7688 p1match = gsc_has_same_alleles_window(parent1, offspring, i, window_size);
7689 p2match = gsc_has_same_alleles_window(parent2, offspring, i, window_size);
7690 if (p1match && !p2match) {
7691 origins[map.chrs[chr].map.reorder.marker_indexes[i]] = p1num;
7692 previous = p1num;
7693 } else if (p2match && !p1match) {
7694 origins[map.chrs[chr].map.reorder.marker_indexes[i]] = p2num;
7695 previous = p2num;
7696 } else {
7697 if (certain) {
7698 origins[map.chrs[chr].map.reorder.marker_indexes[i]] = 0;
7699 } else {
7700 origins[map.chrs[chr].map.reorder.marker_indexes[i]] = previous;
7701 }
7702 }
7703 }
7704 for (; i < map.chrs[chr].map.reorder.n_markers; ++i) {
7705 origins[map.chrs[chr].map.reorder.marker_indexes[i]] = 0;
7706 }
7707 break;
7708 }
7709
7710 }
7711 return origins;
7712}
7713
7750int gsc_calculate_recombinations_from_file(gsc_SimData* d, const char* input_file, const char* output_file,
7751 int window_len, int certain) {
7752 struct gsc_TableSize t = gsc_get_file_dimensions(input_file, '\t');
7753 //open file
7754 FILE* fp;
7755 if ((fp = fopen(input_file, "r")) == NULL) {
7756 fprintf(stderr, "Failed to open file %s.\n", input_file); exit(1);
7757 }
7758 FILE* fpo;
7759 if ((fpo = fopen(output_file, "w")) == NULL) {
7760 fprintf(stderr, "Failed to open file %s.\n", output_file); exit(1);
7761 }
7762
7763 // print header.
7764 for (int j = 0; j < d->genome.n_markers; ++j) {
7765 fprintf(fpo, "\t%s", d->genome.marker_names[j]);
7766 }
7767
7768 int combin_i[3];
7769 char* combin_genes[3];
7770 char buffer[3][50];
7771 int* r;
7772 // for each row in file
7773 for (int i = 0; i < t.num_rows; ++i) {
7774 // load the four grandparents
7775 fscanf(fp, "%s %s %s \n", buffer[0], buffer[1], buffer[2]);
7776 combin_i[0] = gsc_get_index_of_name(d->m, buffer[0]);
7777 combin_i[1] = gsc_get_index_of_name(d->m, buffer[1]);
7778 combin_i[2] = gsc_get_index_of_name(d->m, buffer[2]);
7779 if (combin_i[0] < 0 || combin_i[1] < 0 || combin_i[2] < 0) {
7780 fprintf(stderr, "Genotypes at file %s line %lu could not be found\n", input_file, (long unsigned int) i);
7781 continue;
7782 }
7783 combin_genes[0] = gsc_get_genes_of_index(d->m, combin_i[0]);
7784 combin_genes[1] = gsc_get_genes_of_index(d->m, combin_i[1]);
7785 combin_genes[2] = gsc_get_genes_of_index(d->m, combin_i[2]);
7786
7787 if (window_len == 1) {
7788 r = gsc_calculate_min_recombinations_fw1(d, NO_MAP, combin_genes[1],
7789 gsc_get_id_of_index(d->m, combin_i[1]).id, combin_genes[2],
7790 gsc_get_id_of_index(d->m, combin_i[2]).id, combin_genes[0], certain);
7791 } else {
7792 r = gsc_calculate_min_recombinations_fwn(d, NO_MAP, combin_genes[1],
7793 gsc_get_id_of_index(d->m, combin_i[1]).id, combin_genes[2],
7794 gsc_get_id_of_index(d->m, combin_i[2]).id, combin_genes[0], window_len, certain);
7795 }
7796 fprintf(fpo, "\n%s", buffer[0]);
7797 for (int j = 0; j < d->genome.n_markers; ++j) {
7798 fprintf(fpo, "\t%d", r[j]);
7799 }
7800 GSC_FREE(r);
7801 }
7802
7803 fclose(fp);
7804 fwrite("\n", sizeof(char), 1, fpo);
7805 fflush(fpo);
7806 fclose(fpo);
7807 return 0;
7808}
7809
7810
7811/*--------------------------------Crossing-----------------------------------*/
7812
7841 const char* parent_genome,
7842 char* output,
7843 const GSC_ID_T map_index) {
7844 // assumes rand is already seeded
7845 if (parent_genome == NULL) {
7846 fprintf(stderr, "Could not generate this gamete: no parent provided\n");
7847 return;
7848 }
7849 if (map_index >= d->genome.n_maps) {
7850 fprintf(stderr, "Could not generate this gamete: invalid map provided\n");
7851 return;
7852 }
7853 gsc_RecombinationMap map = d->genome.maps[map_index];
7854
7855 // treat each chromosome separately.
7856 GSC_CREATE_BUFFER(crossover_where, double, 100);
7857 for (GSC_GENOLEN_T chr = 0; chr < d->genome.maps[map_index].n_chr; ++chr) {
7858
7859 // Task 1: How many crossovers
7860 int num_crossovers;
7861 switch (map.chrs[chr].type) {
7862 case GSC_LINKAGEGROUP_SIMPLE:
7863 num_crossovers = gsc_randpoi(&d->rng, map.chrs[chr].map.simple.expected_n_crossovers);
7864 break;
7865 case GSC_LINKAGEGROUP_REORDER:
7866 num_crossovers = gsc_randpoi(&d->rng, map.chrs[chr].map.reorder.expected_n_crossovers);
7867 break;
7868 default:
7869 fprintf(stderr, "Linkage group type of linkage group with index %lu of map with index %lu is corrupted\n",
7870 (long unsigned int) chr, (long unsigned int) map_index);
7871 num_crossovers = 0;
7872 }
7873
7874 // Task 2: Find positions of all crossovers
7875 if (num_crossovers > crossover_wherecap) {
7876 GSC_STRETCH_BUFFER(crossover_where,num_crossovers);
7877 }
7878 for (int i = 0; i < num_crossovers; ++i) {
7879 crossover_where[i] = ((double)rand() / (double)RAND_MAX);
7880 }
7881 if (num_crossovers > 1) {
7882 qsort(crossover_where, num_crossovers, sizeof(double), gsc_helper_ascending_double_comparer);
7883 }
7884
7885 // Task 3: Read off the gamete that those crossovers produce.
7886 int which = rnd_pcg_range(&d->rng,0,1); // if this is 0, we start with the left haplotype
7887 int up_to_crossover = 0;
7888 switch (map.chrs[chr].type) {
7889 case GSC_LINKAGEGROUP_SIMPLE:
7890 for (GSC_GENOLEN_T i = 0; i < map.chrs[chr].map.simple.n_markers; ++i) {
7891 // is it time to invert which parent haplotype we're reading?
7892 while (up_to_crossover < num_crossovers &&
7893 map.chrs[chr].map.simple.dists[i] > crossover_where[up_to_crossover]) {
7894 which = 1 - which;
7895 up_to_crossover++;
7896 }
7897 output[2*(i + map.chrs[chr].map.simple.first_marker_index)] =
7898 parent_genome[2*(i + map.chrs[chr].map.simple.first_marker_index) + which];
7899 }
7900 break;
7901 case GSC_LINKAGEGROUP_REORDER:
7902 for (GSC_GENOLEN_T i = 0; i < map.chrs[chr].map.reorder.n_markers; ++i) {
7903 // is it time to invert which parent haplotype we're reading?
7904 while (up_to_crossover < num_crossovers &&
7905 map.chrs[chr].map.reorder.dists[i] > crossover_where[up_to_crossover]) {
7906 which = 1 - which;
7907 up_to_crossover++;
7908 }
7909 output[2*map.chrs[chr].map.reorder.marker_indexes[i]] =
7910 parent_genome[2*map.chrs[chr].map.reorder.marker_indexes[i] + which];
7911 }
7912 break;
7913 default:
7914 break;
7915 }
7916 }
7917 GSC_DELETE_BUFFER(crossover_where);
7918}
7919
7940 const char* parent_genome,
7941 char* output,
7942 const GSC_ID_T map_index) {
7943 /* For cache reasons it'll be better to copy-paste gsc_generate_gamete with
7944 * one extra line added to the inner loop, than to generate a single gamete
7945 * and then scan over `output` again to copy it. */
7946
7947 // assumes rand is already seeded
7948 if (parent_genome == NULL) {
7949 fprintf(stderr, "Could not make this doubled haploid\n");
7950 return;
7951 }
7952 if (map_index >= d->genome.n_maps) {
7953 fprintf(stderr, "Could not generate this gamete: invalid map provided\n");
7954 return;
7955 }
7956 gsc_RecombinationMap map = d->genome.maps[map_index];
7957
7958 // treat each chromosome separately.
7959 GSC_CREATE_BUFFER(crossover_where, double, 100);
7960 for (GSC_GENOLEN_T chr = 0; chr < d->genome.maps[map_index].n_chr; ++chr) {
7961
7962 // Task 1: How many crossovers
7963 int num_crossovers;
7964 switch (map.chrs[chr].type) {
7965 case GSC_LINKAGEGROUP_SIMPLE:
7966 num_crossovers = gsc_randpoi(&d->rng, map.chrs[chr].map.simple.expected_n_crossovers);
7967 break;
7968 case GSC_LINKAGEGROUP_REORDER:
7969 num_crossovers = gsc_randpoi(&d->rng, map.chrs[chr].map.reorder.expected_n_crossovers);
7970 break;
7971 default:
7972 fprintf(stderr, "Linkage group type of group with index %lu of map with index %lu is corrupted\n",
7973 (long unsigned int) chr, (long unsigned int) map_index);
7974 num_crossovers = 0;
7975 }
7976
7977 // Task 2: Find positions of all crossovers
7978 if (num_crossovers > crossover_wherecap) {
7979 GSC_STRETCH_BUFFER(crossover_where,num_crossovers);
7980 }
7981 for (int i = 0; i < num_crossovers; ++i) {
7982 crossover_where[i] = ((double)rand() / (double)RAND_MAX);
7983 }
7984 if (num_crossovers > 1) {
7985 qsort(crossover_where, num_crossovers, sizeof(double), gsc_helper_ascending_double_comparer);
7986 }
7987
7988 // Task 3: Read off the gamete that those crossovers produce.
7989 int which = rnd_pcg_range(&d->rng,0,1); // if this is 0, we start with the left haplotype
7990 int up_to_crossover = 0;
7991 switch (map.chrs[chr].type) {
7992 case GSC_LINKAGEGROUP_SIMPLE:
7993 for (GSC_GENOLEN_T i = 0; i < map.chrs[chr].map.simple.n_markers; ++i) {
7994 // is it time to invert which parent haplotype we're reading?
7995 while (up_to_crossover < num_crossovers &&
7996 map.chrs[chr].map.simple.dists[i] > crossover_where[up_to_crossover]) {
7997 which = 1 - which;
7998 up_to_crossover++;
7999 }
8000 GSC_GENOLEN_T pos = i + map.chrs[chr].map.simple.first_marker_index;
8001 output[2*pos] = parent_genome[2*pos + which];
8002 output[2*pos + 1] = output[2*pos]; // haploid doubling happens here
8003 }
8004 break;
8005 case GSC_LINKAGEGROUP_REORDER:
8006 for (GSC_GENOLEN_T i = 0; i < map.chrs[chr].map.reorder.n_markers; ++i) {
8007 // is it time to invert which parent haplotype we're reading?
8008 while (up_to_crossover < num_crossovers &&
8009 map.chrs[chr].map.reorder.dists[i] > crossover_where[up_to_crossover]) {
8010 which = 1 - which;
8011 up_to_crossover++;
8012 }
8013 GSC_GENOLEN_T pos = map.chrs[chr].map.reorder.marker_indexes[i];
8014 output[2*pos] = parent_genome[2*pos + which];
8015 output[2*pos + 1] = output[2*pos]; // haploid doubling happens here
8016 }
8017 break;
8018 default:
8019 break;
8020 }
8021 }
8022 GSC_DELETE_BUFFER(crossover_where);
8023}
8024
8025
8038 const char* parent_genome,
8039 char* output) {
8040 for (GSC_GENOLEN_T j = 0; j < d->genome.n_markers; ++j) {
8041 output[2*j] = parent_genome[2*j];
8042 output[2*j + 1] = parent_genome[2*j + 1];
8043 }
8044 return;
8045}
8046
8049 FILE* fp = NULL;
8051 char tmpname_p[NAME_LENGTH];
8052 if (g.filename_prefix != NULL) {
8053 strncpy(tmpname_p, g.filename_prefix,
8054 sizeof(char)*(NAME_LENGTH-13));
8055 } else {
8056 strcpy(tmpname_p, "out");
8057 }
8058 strcat(tmpname_p, "-pedigree.txt");
8059 fp = fopen(tmpname_p, "w");
8060 }
8061 return fp;
8062}
8069 const gsc_GenOptions g,
8070 GSC_ID_T* effIndexp) {
8071 FILE* fe = NULL;
8074 if (*effIndexp != GSC_NA_IDX) {
8075 char tmpname_b[NAME_LENGTH];
8076 if (g.filename_prefix != NULL) {
8077 strncpy(tmpname_b, g.filename_prefix,
8078 sizeof(char)*(NAME_LENGTH-7));
8079 } else {
8080 strcpy(tmpname_b, "out");
8081 }
8082 strcat(tmpname_b, "-bv.txt");
8083 fe = fopen(tmpname_b, "w");
8084 }
8085 }
8086 return fe;
8087}
8090 const gsc_GenOptions g) {
8091 FILE* fg = NULL;
8093 char tmpname_g[NAME_LENGTH];
8094 if (g.filename_prefix != NULL) {
8095 strncpy(tmpname_g, g.filename_prefix,
8096 sizeof(char)*(NAME_LENGTH-13));
8097 } else {
8098 strcpy(tmpname_g, "out");
8099 }
8100 strcat(tmpname_g, "-genotype.txt");
8101 fg = fopen(tmpname_g, "w");
8102 // Save genetic markers as header row.
8104 }
8105 return fg;
8106}
8107
8114 gsc_SimData* d,
8115 gsc_AlleleMatrix* tosave) {
8116 if (fp) {
8120 }
8121}
8129 gsc_MarkerEffects* effMatrices,
8130 GSC_ID_T effIndex,
8131 gsc_AlleleMatrix* tosave) {
8132 if (fe && effIndex != GSC_NA_IDX) {
8134 gsc_save_utility_bvs(fe, &it, effMatrices + effIndex);
8136 }
8137}
8144 if (fg) {
8146 gsc_save_utility_genotypes(fg, &it, tosave->n_markers, NULL, GSC_FALSE);
8148 }
8149}
8150
8155 gsc_SimData* d,
8156 const gsc_GenOptions g) {
8157 if (g.will_name_offspring) {
8159 }
8160 if (g.will_allocate_ids) {
8161 for (GSC_LOCALX_T j = 0; j < am->n_genotypes; ++j) {
8162 ++(d->current_id.id);
8163 am->ids[j] = d->current_id;
8164 }
8165 }
8166}
8167
8168
8170 struct {
8177 struct {
8188 struct {
8196 struct {
8198 unsigned int n_gens_selfing;
8200 struct {
8201 GSC_ID_T map_index; // needs to be in first spot to match selfing.map_index
8203 struct {
8207};
8208
8209
8245 const gsc_GenOptions g,
8246 void* parentIterator,
8247 union gsc_datastore_make_genotypes* datastore,
8248 int (*parentChooser)(void*,
8250 GSC_GLOBALX_T*,
8251 gsc_ParentChoice[static 2]),
8252 void (*offspringGenerator)(gsc_SimData*,
8254 gsc_ParentChoice[static 2],
8256 ) {
8257 if (g.family_size < 1 || d == NULL ||
8258 parentChooser == NULL || offspringGenerator == NULL) {
8259 return GSC_NO_GROUP;
8260 }
8261
8262 // create the buffer we'll use to save the output crosses before they're printed.
8264 GSC_LOCALX_T fullness = 0;
8265 GSC_GLOBALX_T counter = 0;
8268
8269 gsc_AlleleMatrix* last = NULL;
8270 gsc_GroupNum output_group = GSC_NO_GROUP;
8271 if (g.will_save_to_simdata) {
8272 last = d->m; // for saving to simdata
8273 while (last->next != NULL) {
8274 last = last->next;
8275 }
8276 output_group = gsc_get_new_group_num( d );
8277 }
8278
8279 // open the output files, if applicable
8281 GSC_ID_T effIndex = GSC_NA_IDX;
8282 FILE* fe = gsc_helper_genoptions_save_bvs_setup(d,g,&effIndex);
8284
8285 //RPACKINSERT GetRNGstate();
8286 // loop through each combination
8287 while (parentChooser(parentIterator, datastore, &counter, parents)) {
8288 ++counter;
8289 for (GSC_GLOBALX_T f = 0; f < g.family_size; ++f, ++fullness) {
8290 //RPACKINSERT R_CheckUserInterrupt();
8291
8292 // when offspring buffer is full, save these outcomes to the file.
8293 if (fullness >= CONTIG_WIDTH) {
8294 offspring->n_genotypes = CONTIG_WIDTH;
8296 gsc_helper_genoptions_save_pedigrees(fp, d, offspring);
8297 gsc_helper_genoptions_save_bvs(fe, d->e, effIndex, offspring);
8299
8300 if (g.will_save_to_simdata) {
8301 last->next = offspring;
8302 last = last->next;
8304 }
8305 fullness = 0; //reset the count and start refilling the matrix
8306 }
8307
8308 // do the cross.
8309 gsc_GenoLocation offspringPos = { .localAM=offspring, .localPos=fullness };
8310 offspringGenerator(d, datastore, parents, offspringPos);
8311 offspring->groups[fullness] = output_group;
8312 if (g.will_track_pedigree) {
8313 offspring->pedigrees[0][fullness] = gsc_get_id(parents[0].loc);
8314 offspring->pedigrees[1][fullness] = gsc_get_id(parents[1].loc);
8315 }
8316 }
8317 }
8318 //RPACKINSERT PutRNGstate();
8319
8320 offspring->n_genotypes = fullness;
8322 gsc_helper_genoptions_save_pedigrees(fp, d, offspring);
8323 gsc_helper_genoptions_save_bvs(fe, d->e, effIndex, offspring);
8325
8326 if (fp) fclose(fp);
8327 if (fe) fclose(fe);
8328 if (fg) fclose(fg);
8329
8330 if (counter > 0 && g.will_save_to_simdata) {
8331 last->next = offspring;
8332 d->n_groups++;
8334 return output_group;
8335 } else {
8336 gsc_delete_allele_matrix( offspring );
8337 return GSC_NO_GROUP;
8338 }
8339}
8340
8350static int gsc_helper_parentchooser_cross_randomly(void* parentIterator,
8351 union gsc_datastore_make_genotypes* datastore,
8352 GSC_GLOBALX_T* counter,
8353 gsc_ParentChoice parents[static 2]) {
8354 gsc_RandomAccessIterator* it = (gsc_RandomAccessIterator*) parentIterator;
8355
8356 GSC_GLOBALX_T parentixs[2] = { 0 };
8357
8358 if (*counter < datastore->rand.n_crosses &&
8359 (datastore->rand.cap == 0 || (*counter) < datastore->rand.cap * datastore->rand.group_size)) {
8360 // get parents, randomly. Must not be identical or already been used too many times.
8361 parentixs[0] = gsc_randomdraw_replacementrules(it[0].d,
8362 datastore->rand.group_size,
8363 datastore->rand.cap,
8364 datastore->rand.uses,
8366 parentixs[1] = gsc_randomdraw_replacementrules(it[0].d,
8367 datastore->rand.group_size,
8368 datastore->rand.cap,
8369 datastore->rand.uses,
8370 parentixs[0]);
8371
8372 if (datastore->rand.cap > 0) {
8373 datastore->rand.uses[parentixs[0]] += 1;
8374 datastore->rand.uses[parentixs[1]] += 1;
8375 }
8376
8377 // Neither of these should fail, if nparents is good.
8378 parents[0].loc = gsc_next_get_nth(parentIterator, parentixs[0]);
8379 parents[1].loc = gsc_next_get_nth(parentIterator, parentixs[1]);
8380 // Reiterate map. Might save us a read to not bother checking their values first.
8381 parents[0].mapindex = datastore->rand.map_index;
8382 parents[1].mapindex = datastore->rand.map_index;
8383 // This will cut short gsc_scaffold_make_new_genotypes execution if either parent is invalid.
8384 return GSC_IS_VALID_LOCATION(parents[0].loc) && GSC_IS_VALID_LOCATION(parents[1].loc);
8385 } else {
8386 return GSC_FALSE;
8387 }
8388}
8389
8403 union gsc_datastore_make_genotypes* datastore,
8404 gsc_ParentChoice parents[static 2],
8405 gsc_GenoLocation putHere) {
8406 // (silly name)
8407 gsc_generate_gamete(d, gsc_get_alleles(parents[0].loc), (gsc_get_alleles(putHere) ), parents[0].mapindex);
8408 gsc_generate_gamete(d, gsc_get_alleles(parents[1].loc), (gsc_get_alleles(putHere)+1), parents[1].mapindex);
8409}
8410
8417 const gsc_GroupNum from_group,
8418 const GSC_GLOBALX_T n_crosses,
8419 const GSC_GLOBALX_T cap) {
8420 GSC_GLOBALX_T g_size = gsc_get_group_size(d, from_group); // might be a better way to do this using the iterator.
8421 if (g_size == 0) {
8422 fprintf(stderr,"Group %lu does not exist\n", (long unsigned int) from_group.num);
8423 return 0;
8424 }
8425
8426 if (n_crosses < 1) {
8427 fprintf(stderr,"Invalid n_crosses value provided: n_crosses must be greater than 0\n");
8428 return 0;
8429 }
8430
8431 if (cap < 0) {
8432 fprintf(stderr,"Invalid cap value provided: cap can't be negative\n");
8433 return 0;
8434 }
8435 if (cap > 0 && cap*g_size < n_crosses) {
8436 fprintf(stderr,"Invalid cap value provided: cap of %lu uses on %lu parents too small to make %lu crosses\n",
8437 (long unsigned int) cap, (long unsigned int) g_size, (long unsigned int) n_crosses);
8438 return 0;
8439 }
8440
8441 return g_size;
8442}
8443
8470 const gsc_GroupNum from_group,
8471 const GSC_GLOBALX_T n_crosses,
8472 const GSC_GLOBALX_T cap,
8473 const gsc_MapID which_map,
8474 const gsc_GenOptions g) {
8475 GSC_GLOBALX_T g_size = gsc_helper_random_cross_checks(d, from_group, n_crosses*2, cap);
8476 if (g_size == 0) {
8477 return GSC_NO_GROUP;
8478 } else if (g_size == 1) {
8479 fprintf(stderr,"Group %lu must contain multiple individuals to be able to perform random crossing\n",
8480 (long unsigned int) from_group.num);
8481 return GSC_NO_GROUP;
8482 }
8483 if (d->genome.n_maps < 1) {
8484 fprintf(stderr,"Crossing requires at least one recombination map loaded\n");
8485 return GSC_NO_GROUP;
8486 }
8487
8488 union gsc_datastore_make_genotypes paramstore = { 0 };
8489 paramstore.rand.n_crosses = n_crosses;
8490 paramstore.rand.group_size = g_size;
8491 paramstore.rand.map_index = 0;
8492 paramstore.rand.cap = cap;
8493 if (cap > 0) {
8494 paramstore.rand.uses = gsc_malloc_wrap(sizeof(*paramstore.rand.uses)*g_size,GSC_TRUE);
8495 memset(paramstore.rand.uses, 0, sizeof(*paramstore.rand.uses)*g_size);
8496 } else {
8497 paramstore.rand.uses = NULL;
8498 }
8499
8500 if (which_map.id != NO_MAP.id) {
8501 paramstore.rand.map_index = gsc_get_index_of_map(d, which_map);
8502 }
8503 if (paramstore.rand.map_index == GSC_NA_IDX) {
8504 fprintf(stderr,"Could not find recombination map with identifier %lu\n", (long unsigned int) which_map.id);
8505 return GSC_NO_GROUP;
8506 }
8507
8508 RandomAccessIterator parentit = gsc_create_randomaccess_iter( d, from_group);
8509
8510 gsc_GroupNum offspring = gsc_scaffold_make_new_genotypes(d, g, (void*) &parentit,
8511 &paramstore,
8514
8516 GSC_FREE(paramstore.rand.uses);
8517 return offspring;
8518}
8519
8543 GSC_GLOBALX_T max,
8545 GSC_GLOBALX_T* member_uses,
8546 GSC_GLOBALX_T noCollision) {
8547 if (max < 1 || (max == 1 && noCollision == 0)) {
8548 return GSC_NA_GLOBALX;
8549 }
8550 if (max > INT_MAX) {
8551 fprintf(stderr, "Drawing a random number with a max of %lu is not supported on the C version"
8552 "with the rnd library. If the max is greater than %d, probabilistic uniformity may be lost"
8553 "or an infinite loop may occur.", (long unsigned int) max, INT_MAX);
8554 }
8555
8556 GSC_GLOBALX_T parentix = 0;
8557 if (cap > 0) { // n uses of each parent is capped at a number cap.
8558 do {
8559 parentix = rnd_pcg_range(&d->rng,0,max - 1);
8560 } while (parentix == noCollision || member_uses[parentix] >= cap);
8561 } else { // no cap on usage of each parent.
8562 do {
8563 parentix = rnd_pcg_range(&d->rng,0,max - 1);
8564 } while (parentix == noCollision);
8565 }
8566 return parentix;
8567}
8568
8581 union gsc_datastore_make_genotypes* datastore,
8582 GSC_GLOBALX_T* counter,
8583 gsc_ParentChoice parents[static 2]) {
8584 // caller function should guarantee that nparents is not 1. How would you make a nonselfed cross then?
8585 gsc_RandomAccessIterator* it = (gsc_RandomAccessIterator*) parentIterator;
8586 size_t parentixs[2] = { 0 };
8587
8588 if (*counter < datastore->rand_btwn.n_crosses &&
8589 (datastore->rand_btwn.cap1 == 0 || (*counter) < datastore->rand_btwn.cap1 * datastore->rand_btwn.group1_size) &&
8590 (datastore->rand_btwn.cap2 == 0 || (*counter) < datastore->rand_btwn.cap2 * datastore->rand_btwn.group2_size)) {
8591 // get parents, randomly. Must not be identical or already been used too many times.
8592 parentixs[0] = gsc_randomdraw_replacementrules(it[0].d,
8593 datastore->rand_btwn.group1_size,
8594 datastore->rand_btwn.cap1,
8595 datastore->rand_btwn.uses1,
8597 parentixs[1] = gsc_randomdraw_replacementrules(it[1].d,
8598 datastore->rand_btwn.group2_size,
8599 datastore->rand_btwn.cap2,
8600 datastore->rand_btwn.uses2,
8602
8603 if (datastore->rand_btwn.cap1 > 0) {
8604 datastore->rand_btwn.uses1[parentixs[0]] += 1;
8605 }
8606 if (datastore->rand_btwn.cap2 > 0) {
8607 datastore->rand_btwn.uses2[parentixs[1]] += 1;
8608 }
8609
8610 parents[0].loc = gsc_next_get_nth(it+0, parentixs[0]);
8611 parents[1].loc = gsc_next_get_nth(it+1, parentixs[1]);
8612 parents[0].mapindex = datastore->rand_btwn.map1_index;
8613 parents[1].mapindex = datastore->rand_btwn.map2_index;
8614 return GSC_IS_VALID_LOCATION(parents[0].loc) && GSC_IS_VALID_LOCATION(parents[1].loc);
8615 }
8616 return GSC_FALSE;
8617}
8618
8656 const gsc_GroupNum group1,
8657 const gsc_GroupNum group2,
8659 const GSC_GLOBALX_T cap1,
8660 const GSC_GLOBALX_T cap2,
8661 const gsc_MapID map1,
8662 const gsc_MapID map2,
8663 const gsc_GenOptions g) {
8666 if (group1_size == 0 || group2_size == 0) {
8667 return GSC_NO_GROUP;
8668 }
8669 if (d->genome.n_maps < 1) {
8670 fprintf(stderr,"Crossing requires at least one recombination map loaded\n");
8671 return GSC_NO_GROUP;
8672 }
8673
8674 union gsc_datastore_make_genotypes paramstore;
8675 paramstore.rand_btwn.n_crosses = n_crosses;
8676 paramstore.rand_btwn.group1_size = group1_size;
8677 paramstore.rand_btwn.group2_size = group2_size;
8678 paramstore.rand_btwn.map1_index = 0;
8679 paramstore.rand_btwn.map2_index = 0;
8680 paramstore.rand_btwn.cap1 = cap1;
8681 paramstore.rand_btwn.cap2 = cap2;
8682 if (cap1 > 0) {
8683 paramstore.rand_btwn.uses1 =
8684 gsc_malloc_wrap(sizeof(*paramstore.rand_btwn.uses1)*group1_size,GSC_TRUE);
8685 memset(paramstore.rand_btwn.uses1, 0, sizeof(*paramstore.rand_btwn.uses1)*group1_size);
8686 } else {
8687 paramstore.rand_btwn.uses1 = NULL;
8688 }
8689 if (cap2 > 0) {
8690 paramstore.rand_btwn.uses2 =
8691 gsc_malloc_wrap(sizeof(*paramstore.rand_btwn.uses2)*group2_size,GSC_TRUE);
8692 memset(paramstore.rand_btwn.uses2, 0, sizeof(*paramstore.rand_btwn.uses2)*group2_size);
8693 } else {
8694 paramstore.rand_btwn.uses2 = NULL;
8695 }
8696
8697 if (map1.id != NO_MAP.id) {
8698 paramstore.rand_btwn.map1_index = gsc_get_index_of_map(d, map1);
8699 }
8700 if (paramstore.rand_btwn.map1_index == GSC_NA_IDX) {
8701 fprintf(stderr,"Could not find recombination map with identifier %lu\n", (long unsigned int) map1.id);
8702 return GSC_NO_GROUP;
8703 }
8704 if (map2.id != NO_MAP.id) {
8705 paramstore.rand_btwn.map2_index = gsc_get_index_of_map(d, map2);
8706 }
8707 if (paramstore.rand_btwn.map2_index == GSC_NA_IDX) {
8708 fprintf(stderr,"Could not find recombination map with identifier %lu\n", (long unsigned int) map2.id);
8709 return GSC_NO_GROUP;
8710 }
8711
8712 gsc_RandomAccessIterator parentit[2] = { gsc_create_randomaccess_iter( d, group1 ),
8713 gsc_create_randomaccess_iter( d, group2 ) };
8714
8715 gsc_GroupNum offspring = gsc_scaffold_make_new_genotypes(d, g, (void*) parentit,
8716 &paramstore,
8719
8720 gsc_delete_randomaccess_iter(&parentit[0]);
8721 gsc_delete_randomaccess_iter(&parentit[1]);
8722 GSC_FREE(paramstore.rand_btwn.uses1);
8723 GSC_FREE(paramstore.rand_btwn.uses2);
8724 return offspring;
8725
8726}
8727
8737static int gsc_helper_parentchooser_cross_targeted(void* parentIterator,
8738 union gsc_datastore_make_genotypes* datastore,
8739 GSC_GLOBALX_T* counter,
8740 gsc_ParentChoice parents[static 2]) {
8741 gsc_RandomAccessIterator* it = (gsc_RandomAccessIterator*) parentIterator;
8742
8743 while (*counter < datastore->targeted.n_crosses) {
8744 if (datastore->targeted.first_parents[*counter] != GSC_NA_GLOBALX &&
8745 datastore->targeted.second_parents[*counter] != GSC_NA_GLOBALX) {
8746 // We only try to "get nth" if it seems like a potentially reasonable value
8747 parents[0].loc = gsc_next_get_nth(it, datastore->targeted.first_parents[*counter]);
8748 parents[1].loc = gsc_next_get_nth(it, datastore->targeted.second_parents[*counter]);
8749 parents[0].mapindex = datastore->targeted.map1_index;
8750 parents[1].mapindex = datastore->targeted.map2_index;
8751
8752 if (GSC_IS_VALID_LOCATION(parents[0].loc) && GSC_IS_VALID_LOCATION(parents[1].loc)) {
8753 return GSC_TRUE;
8754 }
8755 }
8756
8757 // If this was not a valid pair of parents, skip them and move on to the next pair.
8758 ++ datastore->targeted.bad_pairings;
8759 ++ (*counter);
8760 }
8761 return GSC_FALSE;
8762}
8763
8797 const size_t n_combinations,
8798 const GSC_GLOBALX_T* firstParents,
8799 const GSC_GLOBALX_T* secondParents,
8800 const gsc_MapID map1,
8801 const gsc_MapID map2,
8802 const gsc_GenOptions g) {
8803 if (n_combinations < 1) {
8804 fprintf(stderr,"Invalid n_combinations value provided: n_combinations must be greater than 0\n");
8805 return GSC_NO_GROUP;
8806 }
8807 if (d->genome.n_maps < 1) {
8808 fprintf(stderr,"Crossing requires at least one recombination map loaded\n");
8809 return GSC_NO_GROUP;
8810 }
8811
8812 union gsc_datastore_make_genotypes paramstore;
8813 paramstore.targeted.n_crosses = n_combinations;
8814 paramstore.targeted.bad_pairings = 0;
8815 paramstore.targeted.map1_index = 0;
8816 paramstore.targeted.map2_index = 0;
8817 // casting away const but is being used as readonly
8818 paramstore.targeted.first_parents = (GSC_GLOBALX_T*) firstParents;
8819 paramstore.targeted.second_parents = (GSC_GLOBALX_T*) secondParents;
8820
8821 if (map1.id != NO_MAP.id) {
8822 paramstore.targeted.map1_index = gsc_get_index_of_map(d, map1);
8823 }
8824 if (paramstore.targeted.map1_index == GSC_NA_IDX) {
8825 fprintf(stderr,"Could not find recombination map with identifier %lu\n", (long unsigned int) map1.id);
8826 return GSC_NO_GROUP;
8827 }
8828 if (map2.id != NO_MAP.id) {
8829 paramstore.targeted.map2_index = gsc_get_index_of_map(d, map2);
8830 }
8831 if (paramstore.targeted.map2_index == GSC_NA_IDX) {
8832 fprintf(stderr,"Could not find recombination map with identifier %lu\n", (long unsigned int) map2.id);
8833 return GSC_NO_GROUP;
8834 }
8835
8837
8838 gsc_GroupNum offspring = gsc_scaffold_make_new_genotypes(d, g, (void*) &parentit,
8839 &paramstore,
8842
8844 if (paramstore.targeted.bad_pairings > 0) {
8845 fprintf(stderr,"Targeted crossing failed for %lu out of the %lu requested pairings due to one or both genotype indexes being invalid\n", (long unsigned int) paramstore.targeted.bad_pairings, (long unsigned int) n_combinations);
8846 }
8847 if (n_combinations - paramstore.targeted.bad_pairings == 0) {
8848 return GSC_NO_GROUP;
8849 }
8850 return offspring;
8851}
8852
8862static int gsc_helper_parentchooser_selfing(void* parentIterator,
8863 union gsc_datastore_make_genotypes* datastore,
8864 GSC_GLOBALX_T* counter,
8865 gsc_ParentChoice parents[static 2]) {
8867
8868 parents[0].loc = gsc_next_forwards(it);
8869 parents[0].mapindex = datastore->selfing.map_index;
8870 parents[1] = parents[0];
8871
8872 return GSC_IS_VALID_LOCATION(parents[0].loc);
8873}
8874
8887 union gsc_datastore_make_genotypes* datastore,
8888 gsc_ParentChoice parents[static 2],
8889 gsc_GenoLocation putHere) {
8890 unsigned int n = datastore->selfing.n_gens_selfing;
8891
8892 // error checking parents are the same is not done.
8893 // error checking n >= 1 is not done.
8894
8895 char* tmpparent = gsc_get_alleles(parents[0].loc);
8896 GSC_ID_T map = parents[0].mapindex;
8897 GSC_CREATE_BUFFER(tmpchild,char,d->genome.n_markers<<1);
8898 char* output = gsc_get_alleles(putHere);
8899 int n_oddness = n % 2;
8900 for (unsigned int i = 0; i < n; ++i) {
8901 if (i % 2 == n_oddness) {
8902 gsc_generate_gamete(d, tmpparent, tmpchild, map);
8903 gsc_generate_gamete(d, tmpparent, tmpchild+1, map);
8904 tmpparent = tmpchild;
8905 } else {
8906 gsc_generate_gamete(d, tmpparent, output, map);
8907 gsc_generate_gamete(d, tmpparent, output+1, map);
8908 tmpparent = output;
8909 }
8910 }
8911 GSC_DELETE_BUFFER(tmpchild);
8912}
8913
8938 const unsigned int n,
8939 const gsc_GroupNum group,
8940 const gsc_MapID which_map,
8941 const gsc_GenOptions g) {
8942 /*int group_size = gsc_get_group_size( d, group);
8943 if (group_size < 1) {
8944 fprintf(stderr,"Group %d does not exist.\n", group.num);
8945 return GSC_NO_GROUP;
8946 }*/
8947 if (n < 1) {
8948 fprintf(stderr,"Invalid n value provided: Number of generations must be greater than 0\n");
8949 return GSC_NO_GROUP;
8950 }
8951 if (d->genome.n_maps == 0) {
8952 fprintf(stderr,"Selfing requires at least one recombination map loaded\n");
8953 return GSC_NO_GROUP;
8954 }
8955
8956 union gsc_datastore_make_genotypes paramstore;
8957 paramstore.selfing.map_index = 0;
8958 paramstore.selfing.n_gens_selfing = n;
8959
8960 if (which_map.id != NO_MAP.id) {
8961 paramstore.selfing.map_index = gsc_get_index_of_map(d, which_map);
8962 }
8963 if (paramstore.selfing.map_index == GSC_NA_IDX) {
8964 fprintf(stderr,"Could not find recombination map with identifier %lu\n", (long unsigned int) which_map.id);
8965 return GSC_NO_GROUP;
8966 }
8967
8969
8970 return gsc_scaffold_make_new_genotypes(d, g, (void*) &parentit,
8971 &paramstore,
8974}
8975
8985 union gsc_datastore_make_genotypes* datastore,
8986 gsc_ParentChoice parents[static 2],
8987 gsc_GenoLocation putHere) {
8989 gsc_get_alleles(parents[0].loc),
8990 gsc_get_alleles(putHere),
8991 parents[0].mapindex);
8992}
8993
9014 const gsc_GroupNum group,
9015 const gsc_MapID which_map,
9016 const gsc_GenOptions g) {
9017 /*int group_size = gsc_get_group_size( d, group);
9018 if (group_size < 1) {
9019 fprintf(stderr,"Group %d does not exist.\n", group.num);
9020 return GSC_NO_GROUP;
9021 }*/
9022 if (d->genome.n_maps == 0) {
9023 fprintf(stderr,"Crossing requires at least one recombination map loaded\n");
9024 return GSC_NO_GROUP;
9025 }
9026
9027 union gsc_datastore_make_genotypes paramstore = { 0 };
9028
9029 if (which_map.id != NO_MAP.id) {
9030 paramstore.doub_haps.map_index = gsc_get_index_of_map(d, which_map);
9031 }
9032 if (paramstore.doub_haps.map_index == GSC_NA_IDX) {
9033 fprintf(stderr,"Could not find recombination map with identifier %lu\n", (long unsigned int) which_map.id);
9034 return GSC_NO_GROUP;
9035 }
9036
9038
9039 return gsc_scaffold_make_new_genotypes(d, g, (void*) &parentit,
9040 &paramstore,
9043}
9044
9056static int gsc_helper_parentchooser_cloning(void* parentIterator,
9057 union gsc_datastore_make_genotypes* datastore,
9058 GSC_GLOBALX_T* counter,
9059 gsc_ParentChoice parents[static 2]) {
9061
9062 parents[0].loc = gsc_next_forwards(it);
9063 parents[1] = parents[0];
9064
9065 if (GSC_IS_VALID_LOCATION(parents[0].loc)) {
9066 if (datastore->clones.inherit_names) {
9067 datastore->clones.parent_name = gsc_get_name(parents[0].loc);
9068 }
9069 return GSC_TRUE;
9070 } else {
9071 return GSC_FALSE;
9072 }
9073}
9074
9084 union gsc_datastore_make_genotypes* datastore,
9085 gsc_ParentChoice parents[static 2],
9086 gsc_GenoLocation putHere) {
9087 if (datastore->clones.inherit_names && datastore->clones.parent_name != NULL) {
9088 char* tmpname = gsc_malloc_wrap(sizeof(char)*(strlen(datastore->clones.parent_name) + 1),GSC_TRUE);
9089 strcpy(tmpname, datastore->clones.parent_name);
9090 gsc_set_name(putHere,tmpname);
9091 }
9092
9093 gsc_generate_clone(d, gsc_get_alleles(parents[0].loc), gsc_get_alleles(putHere));
9094}
9095
9122 const gsc_GroupNum group,
9123 const _Bool inherit_names,
9124 gsc_GenOptions g) {
9125 /*int group_size = gsc_get_group_size( d, group);
9126 if (group_size < 1) {
9127 fprintf(stderr,"Group %d does not exist.\n", group.num);
9128 return GSC_NO_GROUP;
9129 }*/
9130
9131 union gsc_datastore_make_genotypes paramstore;
9132 paramstore.clones.inherit_names = inherit_names;
9133
9135
9136 return gsc_scaffold_make_new_genotypes(d, g, (void*) &parentit,
9137 &paramstore,
9140}
9141
9142
9160 const gsc_GroupNum from_group,
9161 const gsc_MapID mapID,
9162 const gsc_GenOptions g) {
9163 GSC_GLOBALX_T group_size = gsc_get_group_size( d, from_group );
9164 if (group_size < 2) {
9165 if (group_size == 1) {
9166 fprintf(stderr,"Group %lu does not have enough members to perform crosses\n", (long unsigned int) from_group.num);
9167 } else {
9168 fprintf(stderr,"Group %lu does not exist\n", (long unsigned int) from_group.num);
9169 }
9170 return GSC_NO_GROUP;
9171 }
9173 gsc_get_group_indexes( d, from_group, group_size, group_indexes );
9174
9175 // number of crosses = number of entries in upper triangle of matrix
9176 // = half of (n entries in matrix - length of diagonal)
9177 // = half of (lmatrix * lmatrix - lmatrix);
9178 GSC_GLOBALX_T n_crosses = group_size * (group_size - 1) / 2; //* g.family_size;
9179
9182 GSC_GLOBALX_T* combinations[2] = {combos0, combos1};
9183 GSC_GLOBALX_T cross_index = 0;
9184 for (GSC_GLOBALX_T i = 0; i < group_size; ++i) {
9185 for (GSC_GLOBALX_T j = i + 1; j < group_size; ++j) {
9186 combinations[0][cross_index] = group_indexes[i];
9187 combinations[1][cross_index] = group_indexes[j];
9188
9189 ++cross_index;
9190 }
9191 }
9192
9193 GSC_DELETE_BUFFER(group_indexes);
9194 gsc_GroupNum out = gsc_make_targeted_crosses(d, n_crosses, combinations[0], combinations[1], mapID, mapID, g);
9195 GSC_DELETE_BUFFER(combos0);
9196 GSC_DELETE_BUFFER(combos1);
9197 return out;
9198}
9199
9201 const gsc_MapID mapID, const gsc_EffectID effID, const gsc_GenOptions g) {
9202 fprintf(stderr, "Function gsc_make_n_crosses_from_top_m_percent is deprecated."
9203 "It behaved unintuitively and goes against genomicSimulation principles on division of functionality\n");
9204
9205 return NO_GROUP;
9206}
9207
9235 const char* input_file,
9236 const gsc_MapID map1,
9237 const gsc_MapID map2,
9238 const gsc_GenOptions g) {
9239 struct gsc_TableSize t = gsc_get_file_dimensions(input_file, '\t');
9240 if (t.num_rows < 1) {
9241 fprintf(stderr, "No crosses exist in that file\n");
9242 return GSC_NO_GROUP;
9243 }
9244
9245 //open file
9246 FILE* fp;
9247 if ((fp = fopen(input_file, "r")) == NULL) {
9248 fprintf(stderr, "Failed to open file %s.\n", input_file); exit(1);
9249 }
9250
9253 GSC_GLOBALX_T* combinations[2] = {combos0,combos1};
9254 char buffer[2][NAME_LENGTH];
9255 // for each row in file
9256 GSC_GLOBALX_T bufferi = 0;
9257 for (int filei = 0; filei < t.num_rows; ++filei) {
9258 // load the four grandparents
9259 fscanf(fp, "%s %s \n", buffer[0], buffer[1]);
9260 combinations[0][bufferi] = gsc_get_index_of_name(d->m, buffer[0]);
9261 combinations[1][bufferi] = gsc_get_index_of_name(d->m, buffer[1]);
9262 if (combinations[0][bufferi] < 0 || combinations[1][bufferi] < 0) {
9263 fprintf(stderr, "Parents on file %s line %lu could not be found\n", input_file, (long unsigned int) filei);
9264 } else {
9265 ++bufferi;
9266 }
9267 }
9268
9269 fclose(fp);
9270 gsc_GroupNum out = gsc_make_targeted_crosses(d, bufferi, combinations[0], combinations[1], map1, map2, g);
9271 GSC_DELETE_BUFFER(combos0);
9272 GSC_DELETE_BUFFER(combos1);
9273 return out;
9274}
9275
9309 const char* input_file,
9310 const gsc_MapID map1,
9311 const gsc_MapID map2,
9312 const gsc_GenOptions g) {
9313 struct gsc_TableSize t = gsc_get_file_dimensions(input_file, '\t');
9314 if (t.num_rows < 1) {
9315 fprintf(stderr, "No crosses exist in that file\n");
9316 return GSC_NO_GROUP;
9317 }
9318
9319 //open file
9320 FILE* fp;
9321 if ((fp = fopen(input_file, "r")) == NULL) {
9322 fprintf(stderr, "Failed to open file %s.\n", input_file); exit(1);
9323 }
9324
9327 GSC_GLOBALX_T* combinations[2] = {combos0,combos1};
9328 char buffer[4][NAME_LENGTH];
9329 const char* to_buffer[] = {buffer[0], buffer[1], buffer[2], buffer[3]};
9330 gsc_PedigreeID g0_id[4];
9331 GSC_GLOBALX_T f1_i[2];
9332 // for each row in file
9333 for (GSC_GLOBALX_T i = 0; i < t.num_rows; ++i) {
9334 // load the four grandparents
9335 fscanf(fp, "%s %s %s %s \n", buffer[0], buffer[1], buffer[2], buffer[3]);
9336 gsc_get_ids_of_names(d->m, 4, to_buffer, g0_id);
9337 if (g0_id[0].id == GSC_NO_PEDIGREE.id || g0_id[1].id == GSC_NO_PEDIGREE.id || g0_id[2].id == GSC_NO_PEDIGREE.id || g0_id[3].id == GSC_NO_PEDIGREE.id) {
9338 fprintf(stderr, "Could not go ahead with the line %lu cross - g0 names not in records\n",
9339 (long unsigned int) i);
9340 combinations[0][i] = GSC_NA_GLOBALX;
9341 combinations[1][i] = GSC_NA_GLOBALX;
9342 continue;
9343 }
9344
9345 // identify two parents
9346 f1_i[0] = gsc_get_index_of_child(d->m, g0_id[0], g0_id[1]);
9347 f1_i[1] = gsc_get_index_of_child(d->m, g0_id[2], g0_id[3]);
9348 if (f1_i[0] < 0 || f1_i[1] < 0) {
9349 // try different permutations of the four grandparents.
9350 f1_i[0] = gsc_get_index_of_child(d->m, g0_id[0], g0_id[2]);
9351 f1_i[1] = gsc_get_index_of_child(d->m, g0_id[1], g0_id[3]);
9352 if (f1_i[0] < 0 || f1_i[1] < 0) {
9353 f1_i[0] = gsc_get_index_of_child(d->m, g0_id[0], g0_id[3]);
9354 f1_i[1] = gsc_get_index_of_child(d->m, g0_id[1], g0_id[2]);
9355 if (f1_i[0] < 0 || f1_i[1] < 0) {
9356 fprintf(stderr, "Could not go ahead with the line %lu cross - f1 children do not exist for this quartet\n",
9357 (long unsigned int) i);
9358 combinations[0][i] = GSC_NA_GLOBALX;
9359 combinations[1][i] = GSC_NA_GLOBALX;
9360 continue;
9361 }
9362 }
9363 }
9364
9365 //add them to a combinations list
9366 combinations[0][i] = f1_i[0];
9367 combinations[1][i] = f1_i[1];
9368
9369 }
9370
9371 fclose(fp);
9372 gsc_GroupNum out = gsc_make_targeted_crosses(d, t.num_rows, combinations[0], combinations[1],
9373 map1, map2, g);
9374 GSC_DELETE_BUFFER(combos0);
9375 GSC_DELETE_BUFFER(combos1);
9376 return out;
9377}
9378
9379
9380/*--------------------------------Fitness------------------------------------*/
9381
9397 const gsc_GroupNum group,
9398 const gsc_EffectID effID,
9399 const GSC_GLOBALX_T top_n,
9400 const _Bool lowIsBest) {
9401 const GSC_ID_T effIndex = gsc_get_index_of_eff_set(d, effID);
9402 if (effIndex == GSC_NA_IDX) {
9403 fprintf(stderr,"Nonexistent effect set with id %lu\n", (long unsigned int) effID.id);
9404 return GSC_NO_GROUP;
9405 }
9406
9407 GSC_GLOBALX_T group_size = gsc_get_group_size( d, group );
9408 if (group_size == 0) {
9409 fprintf(stderr,"Group %lu does not exist\n", (long unsigned int) group.num);
9410 return GSC_NO_GROUP;
9411 }
9412 GSC_CREATE_BUFFER(group_indexes,GSC_GLOBALX_T,group_size);
9413 gsc_get_group_indexes( d, group, group_size, group_indexes );
9414
9415 if (group_size <= top_n) {
9416 // well we'll just have to move em all
9417 gsc_GroupNum migration = gsc_make_group_from(d, group_size, group_indexes);
9418 return migration;
9419 }
9420
9421 // This should be ordered the same as the indexes
9422 gsc_DecimalMatrix fits = gsc_calculate_bvs( d, group, effID ); // 1 by group_size matrix
9423
9424 // get an array of pointers to those fitnesses
9425 GSC_CREATE_BUFFER(p_fits,double*,fits.dim2);
9426 for (size_t i = 0; i < fits.dim2; i++) {
9427 p_fits[i] = &(fits.matrix[0][i]);
9428 }
9429
9430 // sort descending
9431 if (lowIsBest) {
9432 qsort(p_fits, fits.dim2, sizeof(double*), gsc_helper_ascending_pdouble_comparer);
9433 } else {
9434 qsort(p_fits, fits.dim2, sizeof(double*), gsc_helper_descending_pdouble_comparer);
9435 }
9436
9437 // save the indexes of the best n
9438 GSC_CREATE_BUFFER(top_individuals,GSC_GLOBALX_T,top_n);
9439 for (GSC_GLOBALX_T i = 0; i < top_n; i++) {
9440 top_individuals[i] = group_indexes[p_fits[i] - fits.matrix[0]];
9441 }
9442 gsc_delete_dmatrix(&fits);
9443 GSC_DELETE_BUFFER(p_fits);
9444 GSC_DELETE_BUFFER(group_indexes);
9445
9446 // send those n to a new group
9447 gsc_GroupNum out = gsc_make_group_from(d, top_n, top_individuals);
9448 GSC_DELETE_BUFFER(top_individuals);
9449 return out;
9450}
9451
9470 const gsc_GroupNum group,
9471 const gsc_EffectID effID) {
9472 const GSC_ID_T effIndex = gsc_get_index_of_eff_set(d, effID);
9473 if (effIndex == GSC_NA_IDX) {
9474 fprintf(stderr,"Nonexistent effect set with id %lu\n", (long unsigned int) effID.id);
9475 return gsc_generate_zero_dmatrix(0, 0);
9476 }
9477
9478 // casing away const but I promise not to use the iterator to change anything
9480
9481 gsc_DecimalMatrix bvs = gsc_calculate_utility_bvs(&it, d->e + effIndex);
9482
9484 return bvs;
9485}
9486
9500 const gsc_MarkerEffects* effset) {
9501 if (targets == NULL || effset == NULL) {
9502 fprintf(stderr, "Either targets or marker effects were not provided\n");
9503 return gsc_generate_zero_dmatrix(0, 0);
9504 }
9505 gsc_MarkerEffects e = *effset; // trivial line. just for easier typing
9506
9507 GSC_CREATE_BUFFER(sum, double, 50);
9508 GSC_GLOBALX_T n_genotypes = 0;
9509
9511 while (IS_VALID_LOCATION(loc)) {
9512 if (n_genotypes >= sumcap) {
9513 GSC_STRETCH_BUFFER(sum, 2*n_genotypes);
9514 }
9515
9516 sum[n_genotypes] = 0;
9517
9518 //RPACKINSERT R_CheckUserInterrupt();
9519 char* genotype = gsc_get_alleles(loc);
9520 for (GSC_GENOLEN_T m = 0; m < e.n_markers; ++m) {
9521 double msum = 0.;
9522
9523 for (GSC_GENOLEN_T eix = ((m > 0) ? e.cumn_alleles[m-1] : 0); eix < e.cumn_alleles[m]; ++eix) {
9524 double asum = ( (e.allele[eix] == genotype[2*m]) +
9525 (e.allele[eix] == genotype[2*m+1]) ) * e.eff[eix];
9526 msum += asum; // accumulate action
9527 }
9528
9529 sum[n_genotypes] += msum; // accumulate action
9530 }
9531
9532 ++n_genotypes;
9533 loc = gsc_next_forwards(targets);
9534 }
9535
9536 if (e.centre != NULL) {
9537 double summedcentres = 0.;
9538 for (GSC_GENOLEN_T m = 0; m < e.n_markers; ++m) {
9539 summedcentres += e.centre[m];
9540 }
9541
9542 for (GSC_GLOBALX_T i = 0; i < n_genotypes; ++i) {
9543 sum[i] -= summedcentres;
9544 }
9545 }
9546
9548 GSC_FINALISE_BUFFER(sum,out.matrix[0],n_genotypes);
9549 out.dim2 = n_genotypes;
9550 return out;
9551}
9552
9570 const gsc_GroupNum group,
9571 const char allele) {
9572 GSC_CREATE_BUFFER(counts, double*, 50);
9573 GSC_GLOBALX_T n_genotypes = 0;
9574 // casing away const but I promise not to use the iterator to change anything
9576
9578 while (IS_VALID_LOCATION(loc)) {
9579 if (n_genotypes >= countscap) {
9580 GSC_STRETCH_BUFFER(counts, 2*n_genotypes);
9581 }
9582
9583 //RPACKINSERT R_CheckUserInterrupt();
9584 counts[n_genotypes] = gsc_malloc_wrap(sizeof(*counts[n_genotypes])*d->genome.n_markers, GSC_TRUE);
9585 char* genotype = gsc_get_alleles(loc);
9586 for (GSC_GENOLEN_T m = 0; m < d->genome.n_markers; ++m) { // loop parallelisable
9587 counts[n_genotypes][m] = (genotype[2*m] == allele) + (genotype[2*m+1] == allele);
9588 }
9589
9590 ++n_genotypes;
9591 loc = gsc_next_forwards(&it);
9592 }
9594
9596 GSC_FINALISE_BUFFER(counts,out.matrix,n_genotypes);
9597 out.dim1 = n_genotypes;
9598 out.dim2 = d->genome.n_markers;
9599 return out;
9600}
9601
9602
9631 const gsc_MapID mapid,
9632 const GSC_ID_T n) {
9633 gsc_MarkerBlocks blocks;
9634 blocks.num_blocks = 0;
9635
9636 if (d->genome.n_maps < 1) {
9637 fprintf(stderr,"Creating blocks by chromosome length requires at least one recombination map loaded\n");
9638 return blocks;
9639 }
9640 GSC_ID_T mapix = 0;
9641 if (mapid.id != NO_MAP.id) { mapix = gsc_get_index_of_map(d, mapid); }
9642 if (mapix >= d->genome.n_maps) {
9643 fprintf(stderr,"We don't have that recombination maps loaded. Using default map\n");
9644 mapix = 0;
9645 }
9646 gsc_RecombinationMap map = d->genome.maps[mapix];
9647
9648 if (n < 1) {
9649 fprintf(stderr,"Invalid n value: number of blocks must be positive\n");
9650 return blocks;
9651 }
9652 if (map.n_chr < 1) {
9653 fprintf(stderr,"Map has no chromosomes, so it cannot be divided into blocks\n");
9654 }
9655
9656 blocks.num_blocks = n * map.n_chr;
9658 blocks.markers_in_block = gsc_malloc_wrap(sizeof(*blocks.markers_in_block) * blocks.num_blocks,GSC_TRUE);
9659 for (GSC_ID_T i = 0; i < blocks.num_blocks; ++i) {
9660 blocks.num_markers_in_block[i] = 0;
9661 blocks.markers_in_block[i] = NULL;
9662 }
9663
9664 GSC_CREATE_BUFFER(temp_markers_in_block, GSC_GENOLEN_T, 128);
9665
9666 for (GSC_GENOLEN_T chr = 0; chr < map.n_chr; ++chr) {
9667 GSC_GENOLEN_T firstblockix = chr*n;
9668 GSC_GENOLEN_T blockix = firstblockix;
9669
9670 switch (map.chrs[chr].type) {
9671 case GSC_LINKAGEGROUP_SIMPLE:
9672 if (map.chrs[chr].map.simple.n_markers == 1) {
9673 blocks.num_markers_in_block[firstblockix] = 1;
9674 blocks.markers_in_block[firstblockix] = gsc_malloc_wrap(sizeof(**blocks.markers_in_block), GSC_TRUE);
9675 blocks.markers_in_block[firstblockix][0] = map.chrs[chr].map.simple.first_marker_index;
9676 } else if (map.chrs[chr].map.simple.n_markers > 1) {
9677 // For floating point reasons we manually allocate first marker to first block
9678 blocks.num_markers_in_block[blockix] = 1;
9679 temp_markers_in_block[0] = map.chrs[chr].map.simple.first_marker_index;
9680
9681 for (GSC_GENOLEN_T m = 1; m < map.chrs[chr].map.simple.n_markers; ++m) {
9682 //RPACKINSERT R_CheckUserInterrupt();
9683
9684 while (blockix - firstblockix < n-1 &&
9685 map.chrs[chr].map.simple.dists[m] > (blockix - firstblockix + 1) / (float)n) {
9686 // Save this block and move on to the next one.
9687 if (blocks.num_markers_in_block[blockix] > 0) {
9688 size_t bcapacity = sizeof(**blocks.markers_in_block) * blocks.num_markers_in_block[blockix];
9689 blocks.markers_in_block[blockix] = gsc_malloc_wrap(bcapacity, GSC_TRUE);
9690 memcpy(blocks.markers_in_block[blockix],temp_markers_in_block,bcapacity);
9691 }
9692 ++blockix;
9693 }
9694
9695 // Add this marker to the block.
9696 int currentn = blocks.num_markers_in_block[blockix];
9697 if (currentn >= temp_markers_in_blockcap) {
9698 GSC_STRETCH_BUFFER(temp_markers_in_block,2*currentn);
9699 }
9700 temp_markers_in_block[currentn] = map.chrs[chr].map.simple.first_marker_index + m;
9701 ++(blocks.num_markers_in_block[blockix]);
9702 }
9703
9704 // Save last filled block
9705 if (blocks.num_markers_in_block[blockix] > 0) {
9706 size_t bcapacity = sizeof(**blocks.markers_in_block) * blocks.num_markers_in_block[blockix];
9707 blocks.markers_in_block[blockix] = gsc_malloc_wrap(bcapacity, GSC_TRUE);
9708 memcpy(blocks.markers_in_block[blockix],temp_markers_in_block,bcapacity);
9709 }
9710 }
9711 break;
9712
9713 case GSC_LINKAGEGROUP_REORDER:
9714 if (map.chrs[chr].map.reorder.n_markers == 1) {
9715 blocks.num_markers_in_block[firstblockix] = 1;
9716 blocks.markers_in_block[firstblockix] = gsc_malloc_wrap(sizeof(**blocks.markers_in_block), GSC_TRUE);
9717 blocks.markers_in_block[firstblockix][0] = map.chrs[chr].map.reorder.marker_indexes[0];
9718 } else if (map.chrs[chr].map.reorder.n_markers > 1) {
9719 // For floating point reasons we manually allocate first marker to first block
9720 blocks.num_markers_in_block[blockix] = 1;
9721 temp_markers_in_block[0] = map.chrs[chr].map.reorder.marker_indexes[0];
9722
9723 for (GSC_GENOLEN_T m = 1; m < map.chrs[chr].map.reorder.n_markers; ++m) {
9724 //RPACKINSERT R_CheckUserInterrupt();
9725
9726 while (blockix - firstblockix < n-1 &&
9727 map.chrs[chr].map.reorder.dists[m] > (blockix - firstblockix + 1) / (float)n) {
9728 // Save this block and move on to the next one.
9729 if (blocks.num_markers_in_block[blockix] > 0) {
9730 size_t bcapacity = sizeof(**blocks.markers_in_block) * blocks.num_markers_in_block[blockix];
9731 blocks.markers_in_block[blockix] = gsc_malloc_wrap(bcapacity, GSC_TRUE);
9732 memcpy(blocks.markers_in_block[blockix],temp_markers_in_block,bcapacity);
9733 }
9734 ++blockix;
9735 }
9736
9737 // Add this marker to the block.
9738 int currentn = blocks.num_markers_in_block[blockix];
9739 if (currentn >= temp_markers_in_blockcap) {
9740 GSC_STRETCH_BUFFER(temp_markers_in_block,2*currentn);
9741 }
9742 temp_markers_in_block[currentn] = map.chrs[chr].map.reorder.marker_indexes[m];
9743 ++(blocks.num_markers_in_block[blockix]);
9744 }
9745
9746 // Save last filled block
9747 if (blocks.num_markers_in_block[blockix] > 0) {
9748 size_t bcapacity = sizeof(**blocks.markers_in_block) * blocks.num_markers_in_block[blockix];
9749 blocks.markers_in_block[blockix] = gsc_malloc_wrap(bcapacity, GSC_TRUE);
9750 memcpy(blocks.markers_in_block[blockix],temp_markers_in_block,bcapacity);
9751 }
9752 }
9753 break;
9754 }
9755 }
9756
9757 GSC_DELETE_BUFFER(temp_markers_in_block);
9758
9759 return blocks;
9760}
9761
9783gsc_MarkerBlocks gsc_load_blocks(const gsc_SimData* d, const char* block_file) {
9784 struct gsc_TableSize ts = gsc_get_file_dimensions(block_file, '\t');
9785
9786 gsc_MarkerBlocks blocks;
9787 blocks.num_blocks = ts.num_rows - 1;
9790
9791 FILE* infile;
9792 if ((infile = fopen(block_file, "r")) == NULL) {
9793 fprintf(stderr, "Failed to open file %s.\n", block_file); exit(1);
9794 //return blocks;
9795 }
9796
9797 GSC_GENOLEN_T bufferlen = d->genome.n_markers;
9798 GSC_CREATE_BUFFER(markername,char,CONTIG_WIDTH);
9799 GSC_CREATE_BUFFER(markerbuffer,GSC_GENOLEN_T,bufferlen);
9800 GSC_ID_T bi = 0; // block number
9801
9802 // Ignore the first line
9803 fscanf(infile, "%*[^\n]\n");
9804
9805 // Loop through rows of the file (each row corresponds to a block)
9806 while (fscanf(infile, "%*d %*f %*s %*s ") != EOF) {
9807 //for (int bi = 0; bi < n_blocks; ++bi) {
9808
9809 // Indexes in play:
9810 // bi: index in the blocks struct's arrays of the current block/line in the file
9811 // ni: number of characters so far in the name of the next marker being read from the file
9812 // mi: number of markers that have so far been read from the file for this block
9813 blocks.num_markers_in_block[bi] = 0;
9814 int c;
9815 size_t ni = 0;
9816 GSC_GENOLEN_T mi = 0;
9817
9818 memset(markerbuffer, 0, sizeof(*markerbuffer) * bufferlen);
9819 while ((c = fgetc(infile)) != EOF && c !='\n') {
9820 if (c == ';') {
9821 markername[ni] = '\0';
9822
9823 // identify the index of this marker and save it in the temporary marker buffer `markerbuffer`
9824 GSC_GENOLEN_T markerindex;
9825 if (gsc_get_index_of_genetic_marker(markername, d->genome, &markerindex)) {
9826 ++(blocks.num_markers_in_block[bi]);
9827 markerbuffer[mi] = markerindex;
9828 ++mi;
9829 }
9830
9831 ni = 0;
9832 } else {
9833 markername[ni] = c;
9834 ++ni;
9835 }
9836 }
9837
9838 // copy the markers belonging to this block into the struct
9839 blocks.markers_in_block[bi] = gsc_malloc_wrap(sizeof(GSC_GENOLEN_T) * mi,GSC_TRUE);
9840 for (GSC_GENOLEN_T i = 0; i < mi; ++i) {
9841 blocks.markers_in_block[bi][i] = markerbuffer[i];
9842 }
9843
9844 ++bi;
9845 }
9846
9847 GSC_DELETE_BUFFER(markerbuffer);
9848 GSC_DELETE_BUFFER(markername);
9849 fclose(infile);
9850 return blocks;
9851}
9852
9877 const gsc_GroupNum group,
9878 const gsc_MarkerBlocks b,
9879 const gsc_EffectID effID) {
9880 const GSC_ID_T effIndex = gsc_get_index_of_eff_set(d, effID);
9881 if (effIndex == GSC_NA_IDX) {
9882 fprintf(stderr,"Nonexistent effect set with id %lu\n", (long unsigned int) effID.id);
9883 return gsc_generate_zero_dmatrix(0, 0);
9884 }
9885 gsc_MarkerEffects e = d->e[effIndex];
9886
9887 // casing away const but I promise not to use the iterator to change anything
9889
9891
9893 return bvs;
9894}
9895
9915 if (b.num_blocks == 0) {
9916 GSC_GENOLEN_T ntargets = 0;
9918 while (IS_VALID_LOCATION(loc)) {
9919 ++ntargets; loc = gsc_next_forwards(targets);
9920 }
9921 return gsc_generate_zero_dmatrix(2*ntargets, 0);
9922 }
9923
9924 GSC_CREATE_BUFFER(bvs, double*, 50);
9925 GSC_GLOBALX_T n_genotypes = 0;
9926
9928 if (e.centre == NULL) {
9929 // for each group member
9930 while (IS_VALID_LOCATION(loc)) {
9931 GSC_GLOBALX_T hap1 = 2*n_genotypes;
9932 GSC_GLOBALX_T hap2 = hap1 + 1;
9933
9934 if (hap1 >= bvscap) {
9935 GSC_STRETCH_BUFFER(bvs, 2*hap1);
9936 }
9937
9938 //RPACKINSERT R_CheckUserInterrupt();
9939 bvs[hap1] = gsc_malloc_wrap(sizeof(*bvs[hap1])*b.num_blocks, GSC_TRUE);
9940 bvs[hap2] = gsc_malloc_wrap(sizeof(*bvs[hap2])*b.num_blocks, GSC_TRUE);
9941 char* genotype = gsc_get_alleles(loc);
9942
9943 // for each block
9944 for (GSC_ID_T j = 0; j < b.num_blocks; ++j) {
9945 //RPACKINSERT R_CheckUserInterrupt();
9946 bvs[hap1][j] = 0.;
9947 bvs[hap2][j] = 0.;
9948
9949 // calculate the local BV
9950 for (GSC_GENOLEN_T k = 0; k < b.num_markers_in_block[j]; ++k) {
9951 GSC_GENOLEN_T markerix = b.markers_in_block[j][k];
9952 _Bool gotallele1 = 0;
9953 _Bool gotallele2 = 0;
9954
9955 for (GSC_GENOLEN_T eix = ((markerix > 0) ? e.cumn_alleles[markerix-1] : 0);
9956 eix < e.cumn_alleles[markerix]; ++eix) {
9957 if (!gotallele1 && e.allele[eix] == genotype[2*markerix]) {
9958 bvs[hap1][j] += e.eff[eix];
9959 gotallele1 = 1;
9960 }
9961 if (!gotallele2 && e.allele[eix] == genotype[2*markerix + 1]) {
9962 bvs[hap2][j] += e.eff[eix];
9963 gotallele2 = 1;
9964 }
9965 }
9966 }
9967 }
9968
9969 ++n_genotypes;
9970 loc = gsc_next_forwards(targets);
9971 }
9972 } else { // adding centering in a separate branch for theoretical performance re branching
9973 while (IS_VALID_LOCATION(loc)) {
9974 GSC_GLOBALX_T hap1 = 2*n_genotypes;
9975 GSC_GLOBALX_T hap2 = hap1 + 1;
9976
9977 if (hap1 >= bvscap) {
9978 GSC_STRETCH_BUFFER(bvs, 2*hap1);
9979 }
9980
9981 bvs[hap1] = gsc_malloc_wrap(sizeof(*bvs[hap1])*b.num_blocks, GSC_TRUE);
9982 bvs[hap2] = gsc_malloc_wrap(sizeof(*bvs[hap2])*b.num_blocks, GSC_TRUE);
9983 char* genotype = gsc_get_alleles(loc);
9984
9985 // for each block
9986 for (GSC_ID_T j = 0; j < b.num_blocks; ++j) {
9987 //RPACKINSERT R_CheckUserInterrupt();
9988 bvs[hap1][j] = 0.;
9989 bvs[hap2][j] = 0.;
9990
9991 // calculate the local BV
9992 for (GSC_GENOLEN_T k = 0; k < b.num_markers_in_block[j]; ++k) {
9993 GSC_GENOLEN_T markerix = b.markers_in_block[j][k];
9994 _Bool gotallele1 = 0;
9995 _Bool gotallele2 = 0;
9996
9997 for (GSC_GENOLEN_T eix = ((markerix > 0) ? e.cumn_alleles[markerix-1] : 0);
9998 eix < e.cumn_alleles[markerix]; ++eix) {
9999 if (!gotallele1 && e.allele[eix] == genotype[2*markerix]) {
10000 bvs[hap1][j] += e.eff[eix];
10001 gotallele1 = 1;
10002 }
10003 if (!gotallele2 && e.allele[eix] == genotype[2*markerix + 1]) {
10004 bvs[hap2][j] += e.eff[eix];
10005 gotallele2 = 1;
10006 }
10007 }
10008 bvs[hap1][j] -= e.centre[markerix];
10009 bvs[hap2][j] -= e.centre[markerix];
10010 }
10011 }
10012
10013 ++n_genotypes;
10014 loc = gsc_next_forwards(targets);
10015 }
10016 }
10017
10019 GSC_FINALISE_BUFFER(bvs,out.matrix,2*n_genotypes);
10020 out.dim1 = 2*n_genotypes;
10021 out.dim2 = b.num_blocks;
10022 return out;
10023}
10024
10046 const gsc_EffectID effID,
10047 const char symbol_na,
10048 char* opt_haplotype) {
10049 const GSC_ID_T effIndex = gsc_get_index_of_eff_set(d, effID);
10050 if (effIndex == GSC_NA_IDX) {
10051 fprintf(stderr,"Nonexistent effect set with id %lu\n", (long unsigned int) effID.id);
10052 memset(opt_haplotype, 0, sizeof(char)*(d->genome.n_markers + 1));
10053 return;
10054 }
10055 gsc_MarkerEffects e = d->e[effIndex];
10056
10057 for (GSC_GENOLEN_T m_ix = 0; m_ix < d->genome.n_markers; ++m_ix) {
10058 char best_allele;
10059 double best_score;
10060 GSC_GENOLEN_T e_ix = (m_ix > 0) ? e.cumn_alleles[m_ix-1] : 0;
10061 if (e_ix >= e.cumn_alleles[m_ix]) {
10062 // we have no marker effects for alleles at this marker
10063 best_allele = symbol_na;
10064 } else { // do have marker effects. initialise the max.
10065 best_allele = e.allele[e_ix];
10066 best_score = e.eff[e_ix];
10067 ++ e_ix;
10068 }
10069 for (; e_ix < e.cumn_alleles[m_ix]; ++e_ix) {
10070 if (e.eff[e_ix] > best_score) {
10071 best_score = e.eff[e_ix];
10072 best_allele = e.allele[e_ix];
10073 }
10074 }
10075
10076 opt_haplotype[m_ix] = best_allele;
10077 }
10078 opt_haplotype[d->genome.n_markers] = '\0';
10079}
10080
10081
10105 const gsc_GroupNum group,
10106 const gsc_EffectID effID,
10107 const char symbol_na,
10108 char* opt_haplotype) {
10109 const GSC_ID_T effIndex = gsc_get_index_of_eff_set(d, effID);
10110 if (effIndex == GSC_NA_IDX) {
10111 fprintf(stderr,"Nonexistent effect set with id %lu\n", (long unsigned int) effID.id);
10112 memset(opt_haplotype, 0, sizeof(char)*(d->genome.n_markers + 1));
10113 return;
10114 }
10115 gsc_MarkerEffects e = d->e[effIndex];
10116
10117 GSC_CREATE_BUFFER(checked,_Bool,e.cumn_alleles[e.n_markers-1]);
10118 memset(checked, 0, sizeof(_Bool)*e.cumn_alleles[e.n_markers-1]);
10119
10120 GSC_CREATE_BUFFER(best_score,double,d->genome.n_markers);
10121 for (GSC_GENOLEN_T m = 0; m < d->genome.n_markers; ++m) {
10122 best_score[m] = NAN;
10123 opt_haplotype[m] = symbol_na;
10124 }
10125
10128 while (IS_VALID_LOCATION(loc)) {
10129 char* genotype = gsc_get_alleles(loc);
10130 // Loop through markers
10131 for (GSC_GENOLEN_T m = 0; m < d->genome.n_markers; ++m) {
10132 // Loop through alleles at that marker
10133 for (GSC_GENOLEN_T e_ix = ((m > 0) ? e.cumn_alleles[m-1] : 0);
10134 e_ix < e.cumn_alleles[m]; ++e_ix) {
10135 if (!checked[e_ix] && (genotype[2*m] == e.allele[e_ix] ||
10136 genotype[2*m+1] == e.allele[e_ix])) {
10137 // Let's check if this allele is better
10138 double score = 2 * e.eff[e_ix];
10139 if (isnan(best_score[m]) || score > best_score[m]) {
10140 best_score[m] = score;
10141 opt_haplotype[m] = e.allele[e_ix];
10142 }
10143 checked[e_ix] = 1;
10144 }
10145 }
10146 }
10147 loc = gsc_next_forwards(&it);
10148 }
10149
10151 GSC_DELETE_BUFFER(checked);
10152 GSC_DELETE_BUFFER(best_score);
10153
10154 opt_haplotype[d->genome.n_markers] = '\0';
10155}
10156
10157
10173 const GSC_ID_T effIndex = gsc_get_index_of_eff_set(d, effID);
10174 if (effIndex == GSC_NA_IDX) {
10175 fprintf(stderr,"Nonexistent effect set with id %lu\n", (long unsigned int) effID.id);
10176 return 0;
10177 }
10178 gsc_MarkerEffects e = d->e[effIndex];
10179
10180 double best_gebv = 0.;
10181 for (GSC_GENOLEN_T m_ix = 0; m_ix < e.n_markers; ++m_ix) {
10182 double best_score = 0;
10183 GSC_GENOLEN_T e_ix = (m_ix > 0) ? e.cumn_alleles[m_ix-1] : 0;
10184 if (e_ix < e.cumn_alleles[m_ix]) { // we have marker effects for this marker. initialise
10185 best_score = e.eff[e_ix];
10186 ++e_ix;
10187 }
10188 for (; e_ix < e.cumn_alleles[m_ix]; ++e_ix) {
10189 if (e.eff[e_ix] > best_score) {
10190 best_score = e.eff[e_ix];
10191 }
10192 }
10193
10194 best_gebv += (2*best_score);
10195 }
10196
10197 if (e.centre != NULL) {
10198 double summedcentres = 0.;
10199 for (GSC_GENOLEN_T m = 0; m < e.n_markers; ++m) {
10200 summedcentres += e.centre[m];
10201 }
10202 best_gebv -= summedcentres;
10203 }
10204
10205 return best_gebv;
10206}
10207
10219 const gsc_GroupNum group,
10220 const gsc_EffectID effID) {
10221 const GSC_ID_T effIndex = gsc_get_index_of_eff_set(d, effID);
10222 if (effIndex == GSC_NA_IDX) {
10223 fprintf(stderr,"Nonexistent effect set with id %lu\n", (long unsigned int) effID.id);
10224 return 0;
10225 }
10226 gsc_MarkerEffects e = d->e[effIndex];
10227
10228 GSC_CREATE_BUFFER(checked,_Bool,e.cumn_alleles[e.n_markers-1]);
10229 memset(checked, 0, sizeof(_Bool)*e.cumn_alleles[e.n_markers-1]);
10230
10231 GSC_CREATE_BUFFER(best_score,double,d->genome.n_markers);
10232 for (GSC_GENOLEN_T m = 0; m < d->genome.n_markers; ++m) {
10233 best_score[m] = NAN;
10234 }
10235
10238 while (IS_VALID_LOCATION(loc)) {
10239 char* genotype = gsc_get_alleles(loc);
10240 // Loop through markers
10241 for (GSC_GENOLEN_T m = 0; m < e.n_markers; ++m) {
10242 // Loop through alleles at that marker
10243 for (GSC_GENOLEN_T e_ix = ((m > 0) ? e.cumn_alleles[m-1] : 0);
10244 e_ix < e.cumn_alleles[m]; ++e_ix) {
10245 if (!checked[e_ix] && (genotype[2*m] == e.allele[e_ix] ||
10246 genotype[2*m+1] == e.allele[e_ix])) {
10247 // Let's check if this allele is better
10248 double score = 2 * e.eff[e_ix];
10249 if (isnan(best_score[m]) || score > best_score[m]) {
10250 best_score[m] = score;
10251 }
10252 checked[e_ix] = 1;
10253 }
10254 }
10255 }
10256 loc = gsc_next_forwards(&it);
10257 }
10258
10259 // sum up at the end
10260 double optimal_bv = 0;
10261 for (GSC_GENOLEN_T m = 0; m < e.n_markers; ++m) {
10262 if (!isnan(best_score[m])) {
10263 optimal_bv += best_score[m];
10264 }
10265 }
10266
10267 if (e.centre != NULL) {
10268 double summedcentres = 0.;
10269 for (GSC_GENOLEN_T m = 0; m < e.n_markers; ++m) {
10270 summedcentres += e.centre[m];
10271 }
10272 optimal_bv -= summedcentres;
10273 }
10274
10276 GSC_DELETE_BUFFER(checked);
10277 GSC_DELETE_BUFFER(best_score);
10278 return optimal_bv;
10279}
10280
10292 const GSC_ID_T effIndex = gsc_get_index_of_eff_set(d, effID);
10293 if (effIndex == GSC_NA_IDX) {
10294 fprintf(stderr,"Nonexistent effect set with id %lu\n", (long unsigned int) effID.id);
10295 return 0;
10296 }
10297 gsc_MarkerEffects e = d->e[effIndex];
10298
10299 double worst_gebv = 0.;
10300 for (GSC_GENOLEN_T m_ix = 0; m_ix < e.n_markers; ++m_ix) {
10301 double worst_score = 0;
10302 GSC_GENOLEN_T e_ix = (m_ix > 0) ? e.cumn_alleles[m_ix-1] : 0;
10303 if (e_ix < e.cumn_alleles[m_ix]) { // we have marker effects for this marker. initialise
10304 worst_score = e.eff[e_ix];
10305 ++e_ix;
10306 }
10307 for (; e_ix < e.cumn_alleles[m_ix]; ++e_ix) {
10308 if (e.eff[e_ix] < worst_score) {
10309 worst_score = e.eff[e_ix];
10310 }
10311 }
10312
10313 worst_gebv += (2*worst_score);
10314 }
10315
10316 if (e.centre != NULL) {
10317 double summedcentres = 0.;
10318 for (GSC_GENOLEN_T m = 0; m < e.n_markers; ++m) {
10319 summedcentres += e.centre[m];
10320 }
10321 worst_gebv -= summedcentres;
10322 }
10323
10324 return worst_gebv;
10325}
10326
10327/*--------------------------------Saving-----------------------------------*/
10328
10346void gsc_save_markerblocks(const char* fname,
10347 const gsc_SimData* d,
10348 const gsc_MarkerBlocks b,
10349 const gsc_MapID labelMapID) {
10350 FILE* f;
10351 if ((f = fopen(fname, "w")) == NULL) {
10352 fprintf(stderr, "Failed to open file %s for writing output\n", fname); return;
10353 }
10354
10355 GSC_ID_T mapix;
10356 if (labelMapID.id == NO_MAP.id || (mapix = gsc_get_index_of_map(d, labelMapID)) == GSC_NA_IDX) {
10358 } else {
10360 }
10361}
10362
10382void gsc_save_genotypes(const char* fname,
10383 const gsc_SimData* d,
10384 const gsc_GroupNum groupID,
10385 const _Bool markers_as_rows) {
10386 FILE* f;
10387 if ((f = fopen(fname, "w")) == NULL) {
10388 fprintf(stderr, "Failed to open file %s for writing output\n", fname); return;
10389 }
10390
10391 // casing away const but I promise not to use the iterator to change anything
10393
10394 gsc_save_utility_genotypes(f, &it, d->genome.n_markers, d->genome.marker_names, markers_as_rows);
10395
10397 fclose(f);
10398}
10399
10420void gsc_save_allele_counts(const char* fname,
10421 const gsc_SimData* d,
10422 const gsc_GroupNum groupID,
10423 const char allele,
10424 const _Bool markers_as_rows) {
10425 FILE* f;
10426 if ((f = fopen(fname, "w")) == NULL) {
10427 fprintf(stderr, "Failed to open file %s for writing output\n", fname); return;
10428 }
10429
10430 // casing away const but I promise not to use the iterator to change anything
10432
10434 markers_as_rows, allele);
10435
10437 fclose(f);
10438}
10439
10461void gsc_save_pedigrees(const char* fname,
10462 const gsc_SimData* d,
10463 const gsc_GroupNum groupID,
10464 const _Bool full_pedigree) {
10465 FILE* f;
10466 if ((f = fopen(fname, "w")) == NULL) {
10467 fprintf(stderr, "Failed to open file %s for writing output\n", fname); return;
10468 }
10469
10470 // casing away const but I promise not to use the iterator to change anything
10472
10473 gsc_save_utility_pedigrees(f, &it, full_pedigree, d->m);
10474
10476 fclose(f);
10477}
10478
10494void gsc_save_bvs(const char* fname,
10495 const gsc_SimData* d,
10496 const gsc_GroupNum groupID,
10497 const gsc_EffectID effID) {
10498 FILE* f;
10499 if ((f = fopen(fname, "w")) == NULL) {
10500 fprintf(stderr, "Failed to open file %s for writing output\n", fname); return;
10501 }
10502
10503 GSC_ID_T effix = gsc_get_index_of_eff_set(d, effID);
10504 if (effix == GSC_NA_IDX) {
10505 fprintf(stderr, "Marker effect set %lu does not exist: cannot calculate breeding values\n", (long unsigned int) effID.id); return;
10506 }
10507
10508 // casting away const but I promise not to use the iterator to change anything
10510
10511 gsc_save_utility_bvs(f, &it, &d->e[effix]);
10512
10514 fclose(f);
10515}
10516
10540void gsc_save_local_bvs(const char* fname,
10541 const gsc_SimData* d,
10542 const gsc_GroupNum groupID,
10543 const gsc_MarkerBlocks b,
10544 const gsc_EffectID effID,
10545 const _Bool headers) {
10546 FILE* f;
10547 if ((f = fopen(fname, "w")) == NULL) {
10548 fprintf(stderr, "Failed to open file %s for writing output\n", fname); return;
10549 }
10550
10551 gsc_DecimalMatrix dec = gsc_calculate_local_bvs(d, groupID, b, effID);
10552
10553 if (headers) {
10554 // Re: headers:
10555 // 1. rows shall be genotype names with _1 or _2 appended
10556 // 2. blocks don't have names right now. They shall remain nameless.
10557 GSC_CREATE_BUFFER(ghapnames,char*,dec.dim1);
10558 GSC_GLOBALX_T i = 0;
10559 // casting away const but I promise not to use the iterator to change anything
10562 while (IS_VALID_LOCATION(loc)) {
10563 char* name = gsc_get_name(loc);
10564 int len = (name == NULL) ? 0 : strlen(name); // if name is null, the header will just be "_1" and "_2".
10565 ghapnames[i] = gsc_malloc_wrap(sizeof(char)*(len+3), GSC_TRUE);
10566 ghapnames[i+1] = gsc_malloc_wrap(sizeof(char)*(len+3), GSC_TRUE);
10567
10568 strncpy(ghapnames[i], name, sizeof(char)*len);
10569 ghapnames[i][len] = '_'; ghapnames[i][len+1] = '1'; ghapnames[i][len+2] = '\0';
10570 strncpy(ghapnames[i+1], name, sizeof(char)*len);
10571 ghapnames[i+1][len] = '_'; ghapnames[i+1][len+1] = '2'; ghapnames[i+1][len+2] = '\0';
10572
10573 i += 2;
10574 if (i >= dec.dim1) {
10575 break;
10576 }
10577
10578 loc = gsc_next_forwards(&it);
10579 }
10580 for (; i < dec.dim1; ++i) { // zero trailing entries if something went wrong.
10581 ghapnames[i] = NULL;
10582 }
10583 gsc_save_utility_dmatrix(f, &dec, ghapnames, NULL, 0);
10584 for (size_t i = 0; i < dec.dim1; ++i) {
10585 if (ghapnames[i] != NULL) {
10586 GSC_FREE(ghapnames[i]);
10587 }
10588 }
10589 GSC_DELETE_BUFFER(ghapnames);
10590 } else {
10591 gsc_save_utility_dmatrix(f, &dec, NULL, NULL, 0);
10592 }
10593
10594 gsc_delete_dmatrix(&dec);
10595 fclose(f);
10596}
10597
10601 const gsc_LinkageGroup chr,
10602 double* pos) {
10603 GSC_GENOLEN_T offset;
10604 switch (chr.type) {
10605 case GSC_LINKAGEGROUP_SIMPLE:
10606 offset = markerix - chr.map.simple.first_marker_index;
10607 if (offset >= 0 && offset < chr.map.simple.n_markers) {
10608 if (pos != NULL && chr.map.simple.n_markers > 1) {
10609 *pos = chr.map.simple.dists[offset] * chr.map.simple.expected_n_crossovers;
10610 } else {
10611 *pos = 0; // if there is only one marker on chromosome
10612 }
10613 return GSC_TRUE;
10614 } else {
10615 return GSC_FALSE;
10616 }
10617 case GSC_LINKAGEGROUP_REORDER:
10618 for (GSC_GENOLEN_T i = 0; i < chr.map.reorder.n_markers; ++i) {
10619 if (markerix == chr.map.reorder.marker_indexes[i]) {
10620 if (pos != NULL) {
10621 *pos = chr.map.reorder.dists[i] * chr.map.reorder.expected_n_crossovers;
10622 }
10623 return GSC_TRUE;
10624 }
10625 }
10626 return GSC_FALSE;
10627 }
10628 return GSC_NA;
10629}
10630
10689 const gsc_MarkerBlocks b,
10690 const GSC_GENOLEN_T n_markers,
10691 char** const marker_names,
10692 const RecombinationMap* map) {
10693
10694 // Header only gets printed if there are multiple columns.
10695 // (If no map is provided, we print only the third column (markers in each block))
10696 if (map != NULL) {
10697 const char header[] = "Chrom\tLen\tMarkers\n";
10698 fwrite(header, sizeof(char)*strlen(header), 1, f);
10699 }
10700
10701 for (GSC_ID_T i = 0; i < b.num_blocks; ++i) {
10702 if (map != NULL) {
10703 // If we are provided a map, then try to find and print the length of each block
10704 int isonchr = -1;
10705 double len = 0;
10706 if (b.num_markers_in_block[i] > 0) {
10707 double minpos = 0;
10708 double maxpos = 0;
10709 for (GSC_GENOLEN_T chrix = 0; chrix < map->n_chr; ++chrix) {
10711 map->chrs[chrix],&minpos)) {
10712 isonchr = chrix;
10713 maxpos = minpos;
10714 for (GSC_GENOLEN_T j = 1; j < b.num_markers_in_block[i]; ++j) {
10715 double pos;
10717 map->chrs[chrix],&pos)) {
10718 maxpos = (pos > maxpos) ? pos : maxpos;
10719 minpos = (pos < minpos) ? pos : minpos;
10720 } else {
10721 isonchr = -1;
10722 break;
10723 }
10724 }
10725 len = maxpos - minpos;
10726 break;
10727 }
10728 }
10729 }
10730
10731 if (isonchr >= 0) {
10732 fprintf(f,"%lu\t%lf\t",(long unsigned int)isonchr,len*100);
10733 } else {
10734 const char colns[] = "-\t-\t";
10735 fwrite(colns, sizeof(char)*strlen(colns), 1, f);
10736 }
10737 }
10738
10739 // Print the markers contained in the block
10740 for (GSC_GENOLEN_T j = 0; j < b.num_markers_in_block[i]; ++j) {
10741 GSC_GENOLEN_T k = b.markers_in_block[i][j];
10742 if (k <= n_markers) {
10743 fwrite(marker_names[k], sizeof(char)*strlen(marker_names[k]), 1, f);
10744 } else {
10745 fprintf(f,"%lu",(long unsigned int)k);
10746 }
10747 fputc(';',f);
10748 }
10749
10750 fwrite("\n", sizeof(char), 1, f);
10751 }
10752
10753 fflush(f);
10754 return;
10755}
10756
10764 gsc_BidirectionalIterator* targets,
10765 GSC_GENOLEN_T n_markers,
10766 char** const marker_names,
10767 const _Bool markers_as_rows,
10768 void (*bodycell_printer)(FILE*,
10771 void*),
10772 void* bodycell_printer_data) {
10773
10774 // legacy feature: if printing a specific group's members, put the group number in
10775 // the top left corner cell
10776 if (targets != NULL && targets->group.num != NO_GROUP.num) {
10777 fprintf(f,"%lu",(long unsigned int) targets->group.num);
10778 }
10779
10780 GSC_GLOBALX_T ntargets;
10781 if (markers_as_rows) {
10782 ntargets = 0;
10783 // Header row (genotype names)
10784 if (targets != NULL) {
10786 while (IS_VALID_LOCATION(loc)) {
10787 fwrite("\t", sizeof(char), 1, f);
10788 ++ntargets;
10789 char* n = gsc_get_name(loc);
10790 if (n != NULL) {
10791 fwrite(n, sizeof(char)*strlen(n), 1, f);
10792 } else {
10793 fprintf(f, "%lu", (long unsigned int) gsc_get_id(loc).id);
10794 }
10795
10796 loc = gsc_next_forwards(targets);
10797 }
10798 fwrite("\n", sizeof(char), 1, f);
10799 }
10800
10801 // Body (genotypes and genotype names)
10802 // - This is our row counter
10803 GSC_GENOLEN_T row = 0;
10804 gsc_GenoLocation* genos = NULL;
10805 // - This is our genotype position cache, because BidirectionalIterator does not have a built-in cache
10806 if (ntargets > 0 && ((row < n_markers || (ntargets > 0 && row < targets->cachedAM->n_markers)))) {
10807 genos = gsc_malloc_wrap(sizeof(*genos)*ntargets, GSC_FALSE);
10808 if (genos != NULL) {
10809 genos[0] = gsc_set_bidirectional_iter_to_start(targets);
10810 for (GSC_GLOBALX_T i = 1; i < ntargets; ++i) {
10811 genos[i] = gsc_next_forwards(targets);
10812 }
10813 }
10814 }
10815 while (row < n_markers || (ntargets > 0 && row < targets->cachedAM->n_markers)) {
10816 // Row header
10817 if (row < n_markers) {
10818 if (marker_names[row] != NULL) {
10819 fwrite(marker_names[row], sizeof(char)*strlen(marker_names[row]), 1, f);
10820 }
10821 }
10822
10823 // Row body
10824 for (GSC_GLOBALX_T i = 0; i < ntargets; ++i) {
10825 gsc_GenoLocation loc;
10826 if (genos != NULL) {
10827 loc = genos[i];
10828 } else {
10829 loc = (i == 0) ? gsc_set_bidirectional_iter_to_start(targets) :
10830 gsc_next_forwards(targets);
10831 }
10832
10833 fwrite("\t", sizeof(char), 1, f);
10834 bodycell_printer(f,loc,row,bodycell_printer_data);
10835 }
10836
10837 fwrite("\n", sizeof(char), 1, f);
10838 ++row;
10839 }
10840 if (genos != NULL) { GSC_FREE(genos); }
10841
10842 } else { // markers as rows = false
10843 // Header row (marker names)
10844 if (marker_names != NULL) {
10845 for (GSC_GENOLEN_T i = 0; i < n_markers; ++i) {
10846 fwrite("\t", sizeof(char), 1, f);
10847 if (marker_names[i] != NULL) {
10848 fwrite(marker_names[i], sizeof(char)*strlen(marker_names[i]), 1, f);
10849 }
10850 }
10851 fwrite("\n", sizeof(char), 1, f);
10852 }
10853
10854 // Body (genotypes and genotype names)
10855 if (targets != NULL) {
10857 while (IS_VALID_LOCATION(loc)) {
10858 // Row header
10859 char* n = gsc_get_name(loc);
10860 if (n != NULL) {
10861 fwrite(n, sizeof(char)*strlen(n), 1, f);
10862 } else {
10863 fprintf(f, "%lu", (long unsigned int) gsc_get_id(loc).id);
10864 }
10865
10866 // Row body
10867 for (GSC_GENOLEN_T i = 0; i < targets->cachedAM->n_markers; ++i) {
10868 fwrite("\t", sizeof(char), 1, f);
10869 bodycell_printer(f,loc,i,bodycell_printer_data);
10870 }
10871 fwrite("\n", sizeof(char), 1, f);
10872
10873 loc = gsc_next_forwards(targets);
10874 }
10875 }
10876 }
10877
10878 fflush(f);
10879 return;
10880}
10881
10885 gsc_GenoLocation loc,
10886 GSC_GENOLEN_T markerix,
10887 void* NA) {
10888 if (IS_VALID_LOCATION(loc)) {
10889 fwrite(gsc_get_alleles(loc) + 2*markerix, sizeof(char)*2, 1, f);
10890 }
10891}
10892
10896 gsc_GenoLocation loc,
10897 GSC_GENOLEN_T markerix,
10898 void* data) {
10899 if (IS_VALID_LOCATION(loc)) {
10900 char allele = *(char*) data;
10901 int count = 0;
10902 if (get_alleles(loc)[2*markerix] == allele) { ++count; }
10903 if (get_alleles(loc)[2*markerix + 1] == allele) { ++count; }
10904 char out = '0' + count;
10905 fwrite(&out, sizeof(char), 1, f);
10906 }
10907}
10908
10957 gsc_BidirectionalIterator* targets,
10958 GSC_GENOLEN_T n_markers,
10959 char** const marker_names,
10960 const _Bool markers_as_rows) {
10961 gsc_scaffold_save_genotype_info(f, targets, n_markers, marker_names, markers_as_rows,
10963}
10964
11021 GSC_GENOLEN_T n_markers,
11022 char** const marker_names,
11023 const _Bool markers_as_rows,
11024 const char allele) {
11025 gsc_scaffold_save_genotype_info(f, targets, n_markers, marker_names, markers_as_rows,
11026 &gsc_helper_output_countmatrix_cell, (void*)&allele);
11027}
11028
11036 gsc_PedigreeID p1,
11037 gsc_PedigreeID p2,
11038 void (*strprinter)(char*, size_t, void*),
11039 void (*intprinter)(long unsigned int, void*),
11040 void* printer_data) {
11041 gsc_PedigreeID pedigree[2];
11042
11043 // open brackets
11044 strprinter("=(", sizeof(char)*2,printer_data);
11045 char* name;
11046
11047 // enables us to print only the known parent if one is unknown
11048 if (p1.id == GSC_NO_PEDIGREE.id || p2.id == GSC_NO_PEDIGREE.id) {
11049 p1.id = (p1.id >= p2.id) ? p1.id : p2.id; //max of the two
11050 p2.id = p1.id;
11051 }
11052
11053 if (p1.id == p2.id) {
11054 if (p1.id != GSC_NO_PEDIGREE.id) { //print nothing if both are unknown.
11055 // Selfed parent
11056 name = gsc_get_name_of_id( m, p1);
11057 if (name != NULL) {
11058 strprinter(name, sizeof(char)*strlen(name), printer_data);
11059 } else if (p1.id != GSC_NO_PEDIGREE.id) {
11060 intprinter((long unsigned int) p1.id,printer_data);
11061 }
11062
11063 if (gsc_get_parents_of_id(m, p1, pedigree) == 0) {
11064 gsc_scaffold_save_ancestry_of(m, pedigree[0], pedigree[1],strprinter,intprinter,printer_data);
11065 }
11066 }
11067 } else {
11068 // Parent 1
11069 name = gsc_get_name_of_id( m, p1);
11070 if (name != NULL) {
11071 strprinter(name, sizeof(char)*strlen(name),printer_data);
11072 } else if (p1.id != GSC_NO_PEDIGREE.id) {
11073 intprinter((long unsigned int) p1.id,printer_data);
11074 }
11075 if (gsc_get_parents_of_id(m, p1, pedigree) == 0) {
11076 gsc_scaffold_save_ancestry_of(m, pedigree[0], pedigree[1],strprinter,intprinter,printer_data);
11077 }
11078
11079 // separator
11080 strprinter(",", sizeof(char),printer_data);
11081
11082 // Parent 2
11083 name = gsc_get_name_of_id( m, p2);
11084 if (name != NULL) {
11085 strprinter(name, sizeof(char)*strlen(name),printer_data);
11086 } else if (p2.id != GSC_NO_PEDIGREE.id) {
11087 intprinter((long unsigned int) p2.id,printer_data);
11088 }
11089
11090 if (gsc_get_parents_of_id(m, p2, pedigree) == 0) {
11091 gsc_scaffold_save_ancestry_of(m, pedigree[0], pedigree[1],strprinter,intprinter,printer_data);
11092 }
11093
11094 }
11095
11096 // close brackets
11097 strprinter(")", sizeof(char),printer_data);
11098}
11099
11102static void gsc_helper_ancestry_strprinter_file(char* str, size_t strlen, void* data) {
11103 FILE* f = (FILE*) data;
11104 fwrite(str, strlen, 1, f);
11105}
11106
11109static void gsc_helper_ancestry_intprinter_file(long unsigned int i, void* data) {
11110 FILE* f = (FILE*) data;
11111 fprintf(f, "%lu", i);
11112}
11113
11175 const _Bool full_pedigree,
11176 const AlleleMatrix* parent_pedigree_store) {
11177
11178 if (targets == NULL) { return; }
11179
11180 gsc_GenoLocation loc;
11181 switch (full_pedigree) {
11182 case 0:
11184 while (IS_VALID_LOCATION(loc)) {
11185 // Offspring
11186 char* n = gsc_get_name(loc);
11187 if (n != NULL) {
11188 fwrite(n, sizeof(char)*strlen(n), 1, f);
11189 } else {
11190 fprintf(f, "%lu", (long unsigned int) gsc_get_id(loc).id);
11191 }
11192
11193 // Parents
11194 for (int parent = 0; parent < 2; ++parent) {
11195 fwrite("\t", sizeof(char), 1, f);
11196 n = NULL;
11197 gsc_PedigreeID p = (parent == 0) ? gsc_get_first_parent(loc) : gsc_get_second_parent(loc);
11198 if (p.id != GSC_NO_PEDIGREE.id && parent_pedigree_store != NULL) {
11199 n = gsc_get_name_of_id(parent_pedigree_store, p);
11200 }
11201 if (n != NULL) {
11202 fwrite(n, sizeof(char)*strlen(n), 1, f);
11203 } else if (p.id != NO_PEDIGREE.id) {
11204 fprintf(f, "%lu", (long unsigned int) p.id);
11205 }
11206 }
11207
11208 fwrite("\n", sizeof(char), 1, f);
11209 loc = gsc_next_forwards(targets);
11210 }
11211
11212 break;
11213 case 1:
11215 while (IS_VALID_LOCATION(loc)) {
11216 // Offspring
11217 fprintf(f, "%lu\t", (long unsigned int) gsc_get_id(loc).id);
11218 char* n = gsc_get_name(loc);
11219 if (n != NULL) {
11220 fwrite(n, sizeof(char)*strlen(n), 1, f);
11221 }
11222
11223 // Parents (recursively)
11224 if ((gsc_get_first_parent(loc).id != GSC_NO_PEDIGREE.id ||
11226 && parent_pedigree_store != NULL) {
11227 gsc_scaffold_save_ancestry_of(parent_pedigree_store,
11230 }
11231
11232 fwrite("\n", sizeof(char), 1, f);
11233 loc = gsc_next_forwards(targets);
11234 }
11235
11236 break;
11237 }
11238
11239 fflush(f);
11240 return;
11241}
11242
11264 gsc_BidirectionalIterator* targets,
11265 const gsc_MarkerEffects* eff) {
11266 if (targets == NULL || eff == NULL) { return; }
11267
11268 gsc_DecimalMatrix bvs = gsc_calculate_utility_bvs(targets, eff);
11270
11271 for (size_t i = 0; i < bvs.dim2; ++i) {
11272 if (IS_VALID_LOCATION(loc)) {
11273 fprintf(f, "%lu", (long unsigned int) gsc_get_id(loc).id);
11274 fwrite("\t", sizeof(char), 1, f);
11275 char* n = gsc_get_name(loc);
11276 if (n != NULL) {
11277 fwrite(n, sizeof(char), strlen(n), f);
11278 }
11279 fwrite("\t", sizeof(char), 1, f);
11280 } else {
11281 fwrite("\t\t", sizeof(char)*2, 1, f);
11282 }
11283
11284 fprintf(f, "%lf", bvs.matrix[0][i]);
11285 fwrite("\n", sizeof(char), 1, f);
11286
11287 loc = gsc_next_forwards(targets);
11288 }
11289
11290 gsc_delete_dmatrix(&bvs);
11291 fflush(f);
11292 return;
11293}
11294
11312 DecimalMatrix* dec,
11313 char** row_headers,
11314 char** col_headers,
11315 _Bool dim1_is_columns) {
11316 if (dec == NULL || dec->dim1 == 0 || dec->dim2 == 0) { return; }
11317
11318 if (col_headers) {
11319 size_t ncols = (dim1_is_columns) ? dec->dim1 : dec->dim2;
11320 fwrite(col_headers[0], sizeof(char), strlen(col_headers[0]), f);
11321 for (size_t col = 1; col < ncols; ++col) {
11322 fwrite("\t", sizeof(char), 1, f);
11323 fwrite(col_headers[col], sizeof(char), strlen(col_headers[col]), f);
11324 }
11325 fwrite("\n", sizeof(char), 1, f);
11326 }
11327
11328 if (dim1_is_columns) {
11329 for (size_t row = 0; row < dec->dim2; ++row) {
11330 if (row_headers) {
11331 fwrite(row_headers[row], sizeof(char), strlen(row_headers[row]), f);
11332 fwrite("\t", sizeof(char), 1, f);
11333 }
11334
11335 fprintf(f,"%lf",dec->matrix[0][row]);
11336 for (size_t col = 1; col < dec->dim1; ++col) {
11337 fwrite("\t", sizeof(char), 1, f);
11338 fprintf(f,"%lf",dec->matrix[col][row]);
11339 }
11340 fwrite("\n", sizeof(char), 1, f);
11341 }
11342 } else {
11343 for (size_t row = 0; row < dec->dim1; ++row) {
11344 if (row_headers) {
11345 fwrite(row_headers[row], sizeof(char), strlen(row_headers[row]), f);
11346 fwrite("\t", sizeof(char), 1, f);
11347 }
11348
11349 fprintf(f,"%lf",dec->matrix[row][0]);
11350 for (size_t col = 1; col < dec->dim2; ++col) {
11351 fwrite("\t", sizeof(char), 1, f);
11352 fprintf(f,"%lf",dec->matrix[row][col]);
11353 }
11354 fwrite("\n", sizeof(char), 1, f);
11355 }
11356 }
11357
11358 fflush(f);
11359 return;
11360}
11361
11362
11363#endif
double gsc_calculate_optimal_possible_bv(const gsc_SimData *d, const gsc_GroupNum group, const gsc_EffectID effID)
Calculates the breeding value of the highest breeding-value genotype that can be created from the all...
gsc_DecimalMatrix gsc_calculate_utility_local_bvs(gsc_BidirectionalIterator *targets, gsc_MarkerBlocks b, gsc_MarkerEffects e)
Calculate local haplotype block breeding values for a set of genotypes.
gsc_DecimalMatrix gsc_calculate_allele_counts(const gsc_SimData *d, const gsc_GroupNum group, const char allele)
Calculates the number of times at each marker that a particular allele appears.
gsc_DecimalMatrix gsc_calculate_bvs(const gsc_SimData *d, const gsc_GroupNum group, const gsc_EffectID effID)
Calculate the fitness metric/breeding value for every genotype in the simulation or every genotype in...
gsc_MarkerBlocks gsc_create_evenlength_blocks_each_chr(const gsc_SimData *d, const gsc_MapID mapid, const unsigned int n)
Divide the genotype into blocks where each block contains all markers within a 1/n length section of ...
gsc_GroupNum gsc_split_by_bv(gsc_SimData *d, const gsc_GroupNum group, const gsc_EffectID effID, const unsigned int top_n, const _Bool lowIsBest)
Takes the top_n individuals in the group with the best breeding values/fitnesses and puts them in a n...
void gsc_calculate_optimal_haplotype(const gsc_SimData *d, const gsc_EffectID effID, const char symbol_na, char *opt_haplotype)
Create a string containing the allele at each marker with the highest contributions to the additive b...
gsc_DecimalMatrix gsc_calculate_utility_bvs(gsc_BidirectionalIterator *targets, const gsc_MarkerEffects *effset)
Calculate the fitness metric/breeding value for a set of genotypes.
void gsc_calculate_optimal_possible_haplotype(const gsc_SimData *d, const gsc_GroupNum group, const gsc_EffectID effID, const char symbol_na, char *opt_haplotype)
Calculates the highest-breeding-value haplotype that can be created from the alleles present in a giv...
gsc_DecimalMatrix gsc_calculate_local_bvs(const gsc_SimData *d, const gsc_GroupNum group, const gsc_MarkerBlocks b, const gsc_EffectID effID)
Calculate local breeding values for every genotype in the simulation or every genotype in a certain g...
double gsc_calculate_minimal_bv(const gsc_SimData *d, const gsc_EffectID effID)
Calculate the lowest possible breeding value any (diploid) genotype could score using this set of mar...
gsc_MarkerBlocks gsc_load_blocks(const gsc_SimData *d, const char *block_file)
Given a file containing definitions of blocks of markers, process that file and return a struct conta...
double gsc_calculate_optimal_bv(const gsc_SimData *d, const gsc_EffectID effID)
Calculate the highest possible breeding value any (diploid) genotype could score using this set of ma...
unsigned int gsc_get_group_genes(const gsc_SimData *d, const gsc_GroupNum group_id, unsigned int group_size, char **output)
Gets a shallow copy of the genes/alleles of each member of the group.
unsigned int gsc_get_group_parent_ids(const gsc_SimData *d, const gsc_GroupNum group_id, unsigned int group_size, const int whichParent, gsc_PedigreeID *output)
Gets the ids of either the first or second parent of each member of the group.
unsigned int gsc_get_group_parent_names(const gsc_SimData *d, const gsc_GroupNum group_id, unsigned int group_size, const int whichParent, char **output)
Gets the names of either the first or second parent of each member of the group.
unsigned int gsc_get_group_pedigrees(const gsc_SimData *d, const gsc_GroupNum group_id, unsigned int group_size, char **output)
Gets the full pedigree string (as per gsc_save_group_full_pedigree() ) of each member of the group.
unsigned int gsc_get_group_names(const gsc_SimData *d, const gsc_GroupNum group_id, unsigned int group_size, char **output)
Gets a shallow copy of the names of each member of the group.
unsigned int gsc_get_group_indexes(const gsc_SimData *d, const gsc_GroupNum group_id, unsigned int group_size, unsigned int *output)
Gets the 0-based global indexes of each member of the group.
unsigned int gsc_get_group_ids(const gsc_SimData *d, const gsc_GroupNum group_id, unsigned int group_size, gsc_PedigreeID *output)
Gets the ids of each member of the group.
unsigned int gsc_get_group_bvs(const gsc_SimData *d, const gsc_GroupNum group_id, const gsc_EffectID effID, unsigned int group_size, double *output)
Gets the breeding values/breeding values/fitnesses of each member of the group.
size_t gsc_get_existing_group_counts(gsc_SimData *d, gsc_GroupNum *out_groups, unsigned int *out_sizes)
Identify group numbers that currently have members, and how many members they have.
size_t gsc_get_existing_groups(gsc_SimData *d, gsc_GroupNum *output)
Identify group numbers that currently have members.
unsigned int gsc_get_group_size(const gsc_SimData *d, const gsc_GroupNum group_id)
Function to count the number of genotypes that currently belong to the specified group.
gsc_GroupNum gsc_make_double_crosses_from_file(gsc_SimData *d, const char *input_file, const gsc_MapID map1, const gsc_MapID map2, const gsc_GenOptions g)
Perform crosses between previously-generated offspring of pairs of parents identified by name in a fi...
gsc_GroupNum gsc_make_random_crosses(gsc_SimData *d, const gsc_GroupNum from_group, const unsigned int n_crosses, const unsigned int cap, const gsc_MapID which_map, const gsc_GenOptions g)
Performs random crosses among members of a group.
gsc_GroupNum gsc_make_crosses_from_file(gsc_SimData *d, const char *input_file, const gsc_MapID map1, const gsc_MapID map2, const gsc_GenOptions g)
Perform crosses between pairs of parents identified by name in a file and allocate the resulting offs...
gsc_GroupNum gsc_make_doubled_haploids(gsc_SimData *d, const gsc_GroupNum group, const gsc_MapID which_map, const gsc_GenOptions g)
Creates a doubled haploid from each member of a group.
gsc_GroupNum gsc_make_clones(gsc_SimData *d, const gsc_GroupNum group, const _Bool inherit_names, gsc_GenOptions g)
Creates an identical copy of each member of a group.
gsc_GroupNum gsc_make_random_crosses_between(gsc_SimData *d, const gsc_GroupNum group1, const gsc_GroupNum group2, const unsigned int n_crosses, const unsigned int cap1, const unsigned int cap2, const gsc_MapID map1, const gsc_MapID map2, const gsc_GenOptions g)
Performs random crosses where the first parent comes from one group and the second from another.
gsc_GroupNum gsc_scaffold_make_new_genotypes(gsc_SimData *d, const gsc_GenOptions g, void *parentIterator, union gsc_datastore_make_genotypes *datastore, int(*parentChooser)(void *, union gsc_datastore_make_genotypes *, unsigned int *, gsc_ParentChoice[static 2]), void(*offspringGenerator)(gsc_SimData *, union gsc_datastore_make_genotypes *, gsc_ParentChoice[static 2], gsc_GenoLocation))
Make new genotypes (generic function)
gsc_GroupNum gsc_make_all_unidirectional_crosses(gsc_SimData *d, const gsc_GroupNum from_group, const gsc_MapID mapID, const gsc_GenOptions g)
Perform crosses between all pairs of parents in the group from_group and allocates the resulting offs...
gsc_GroupNum gsc_self_n_times(gsc_SimData *d, const unsigned int n, const gsc_GroupNum group, const gsc_MapID which_map, const gsc_GenOptions g)
Selfs each member of a group for a certain number of generations.
gsc_GroupNum gsc_make_targeted_crosses(gsc_SimData *d, const size_t n_combinations, const unsigned int *firstParents, const unsigned int *secondParents, const gsc_MapID map1, const gsc_MapID map2, const gsc_GenOptions g)
Performs the crosses of pairs of parents whose indexes are provided in an array.
void gsc_delete_label(gsc_SimData *d, const gsc_LabelID which_label)
Clears memory of this label from the simulation and all its genotypes.
void gsc_delete_simdata(gsc_SimData *m)
Deletes a gsc_SimData object and frees its memory.
void gsc_delete_eff_set(gsc_SimData *d, gsc_EffectID effID)
Deletes a particular set of marker effects from memory.
void gsc_delete_effects_table(gsc_MarkerEffects *m)
Deletes an gsc_MarkerEffects object and frees its memory.
void gsc_delete_randomaccess_iter(gsc_RandomAccessIterator *it)
Deletes a gsc_RandomAccessIterator object and frees its memory.
void gsc_delete_recombination_map(gsc_SimData *d, const gsc_MapID which_map)
Deletes a particular recombination map from memory.
void gsc_delete_bidirectional_iter(gsc_BidirectionalIterator *it)
Deletes a gsc_BidirectionalIterator object.
void gsc_delete_markerblocks(gsc_MarkerBlocks *b)
Delete a gsc_MarkerBlocks struct.
void gsc_delete_dmatrix(gsc_DecimalMatrix *m)
Deletes a gsc_DecimalMatrix and frees its memory.
void gsc_delete_allele_matrix(gsc_AlleleMatrix *m)
Delete the gsc_AlleleMatrix linked list from m onwards and frees its memory.
void gsc_delete_recombination_map_nointegrity(gsc_RecombinationMap *m)
Deletes and clears the memory of a gsc_RecombinationMap struct.
void gsc_delete_group(gsc_SimData *d, const gsc_GroupNum group_id)
Deletes all genotypes belonging to a particular group.
void gsc_move_genotype(gsc_GenoLocation from, gsc_GenoLocation to, int *label_defaults)
Move all details of the genotype at one gsc_GenoLocation to another gsc_GenoLocation.
void gsc_delete_genome(gsc_KnownGenome *g)
Deletes and clears the memory of a gsc_KnownGenome object and its children.
size_t gsc_split_into_buckets(gsc_SimData *d, const gsc_GroupNum group_id, const size_t n, const unsigned int *counts, gsc_GroupNum *results)
Split a group into n groups of equal size (or size differing only by one, if n does not perfectly div...
size_t gsc_split_by_probabilities(gsc_SimData *d, const gsc_GroupNum group_id, const size_t n, const double *probs, gsc_GroupNum *results)
Allocate each member of the group to one of n groups with custom probabilities for each group.
size_t gsc_scaffold_split_by_somequality(gsc_SimData *d, const gsc_GroupNum group_id, void *somequality_data, gsc_GroupNum(*somequality_tester)(gsc_GenoLocation, void *, size_t, size_t, gsc_GroupNum *), size_t maxentries_results, gsc_GroupNum *results)
Split by some quality (generic function)
gsc_GroupNum gsc_split_evenly_into_two(gsc_SimData *d, const gsc_GroupNum group_id)
Split a group into two groups of equal size (or size differing only by one, if the original group had...
gsc_GroupNum gsc_split_by_label_range(gsc_SimData *d, const gsc_GroupNum group, const gsc_LabelID whichLabel, const int valueLowBound, const int valueHighBound)
Allocates the genotypes with values of a label in a particular range to a new group.
size_t gsc_split_into_individuals(gsc_SimData *d, const gsc_GroupNum group_id, size_t maxentries_results, gsc_GroupNum *results)
Split a group into n one-member groups.
size_t gsc_split_into_halfsib_families(gsc_SimData *d, const gsc_GroupNum group_id, const int parent, size_t maxentries_results, gsc_GroupNum *results)
Split a group into families of half-siblings by shared first or second parent.
size_t gsc_split_evenly_into_n(gsc_SimData *d, const gsc_GroupNum group_id, const size_t n, gsc_GroupNum *results)
Split a group into n groups of equal size (or size differing only by one, if n does not perfectly div...
size_t gsc_scaffold_split_by_someallocation(gsc_SimData *d, const gsc_GroupNum group_id, void *someallocator_data, gsc_GroupNum(*someallocator)(gsc_GenoLocation, gsc_SimData *, void *, size_t, size_t *, gsc_GroupNum *), size_t n_outgroups, gsc_GroupNum *outgroups)
Split by some allocator (generic function)
gsc_GroupNum gsc_combine_groups(gsc_SimData *d, const size_t list_len, const gsc_GroupNum *grouplist)
Combine a set of groups into one group.
gsc_GroupNum gsc_split_by_label_value(gsc_SimData *d, const gsc_GroupNum group, const gsc_LabelID whichLabel, const int valueToSplit)
Allocates the genotypes with a particular value of a label to a new group.
gsc_GroupNum gsc_split_randomly_into_two(gsc_SimData *d, const gsc_GroupNum group_id)
Flip a coin for each member of the group to decide if it should be moved to the new group.
gsc_GroupNum gsc_make_group_from(gsc_SimData *d, const size_t index_list_len, const unsigned int *genotype_indexes)
Take a list of indexes and allocate the genotypes at those indexes to a new group.
size_t gsc_split_randomly_into_n(gsc_SimData *d, const gsc_GroupNum group_id, const size_t n, gsc_GroupNum *results)
Allocate each member of the group to one of n groups with equal probability.
size_t gsc_split_into_families(gsc_SimData *d, const gsc_GroupNum group_id, size_t maxentries_results, gsc_GroupNum *results)
Split a group into families by their pedigrees.
gsc_BidirectionalIterator gsc_create_bidirectional_iter(gsc_SimData *d, const gsc_GroupNum group)
Create a bidirectional iterator.
gsc_AlleleMatrix * gsc_get_nth_AlleleMatrix(gsc_AlleleMatrix *listStart, const unsigned int n)
Get an gsc_AlleleMatrix by index in the linked list.
#define GSC_INVALID_GENO_LOCATION
Constant representing a nonexistent location in the simulation.
gsc_BidirectionalIterator gsc_create_bidirectional_iter_fromAM(gsc_AlleleMatrix *am, const gsc_GroupNum group)
gsc_GenoLocation gsc_next_forwards(gsc_BidirectionalIterator *it)
Get the next location from a bidirectional iterator.
gsc_GenoLocation gsc_set_bidirectional_iter_to_end(gsc_BidirectionalIterator *it)
Initialise a Bidirectional iterator to the end of its sequence.
gsc_RandomAccessIterator gsc_create_randomaccess_iter(gsc_SimData *d, const gsc_GroupNum group)
Create a Random Access Iterator.
gsc_GenoLocation gsc_set_bidirectional_iter_to_start(gsc_BidirectionalIterator *it)
Initialise a Bidirectional iterator to the start of its sequence.
gsc_GenoLocation gsc_next_backwards(gsc_BidirectionalIterator *it)
Get the previous location from a bidirectional iterator.
gsc_GenoLocation gsc_next_get_nth(gsc_RandomAccessIterator *it, const unsigned int n)
Get a location by index using a gsc_RandomAccessIterator.
#define GSC_IS_VALID_LOCATION(g)
Check if a GenoLocation is INVALID_GENO_LOCATION.
static gsc_PedigreeID gsc_get_id(const gsc_GenoLocation loc)
Get the persistent id of a genotype.
static char * gsc_get_name(const gsc_GenoLocation loc)
Get the name of a genotype.
static int gsc_get_label_value(const gsc_GenoLocation loc, const int labelIndex)
Get the value of a specific label of a genotype.
static char * gsc_get_alleles(const gsc_GenoLocation loc)
Get the alleles of a genotype.
static void gsc_set_group(const gsc_GenoLocation loc, const gsc_GroupNum group)
Set the current group membership of a genotype.
static gsc_PedigreeID gsc_get_first_parent(const gsc_GenoLocation loc)
Get the first/left parent of a genotype.
static gsc_PedigreeID gsc_get_second_parent(const gsc_GenoLocation loc)
Get the second/right parent of a genotype.
static void gsc_set_name(const gsc_GenoLocation loc, char *name)
Set the name of a genotype.
static gsc_GroupNum gsc_get_group(const gsc_GenoLocation loc)
Get the current group membership of a genotype.
gsc_AlleleMatrix * gsc_create_empty_allelematrix(const unsigned int n_markers, const unsigned int n_labels, const int *labelDefaults, const unsigned int n_genotypes)
Creator for an empty gsc_AlleleMatrix object of a given size.
gsc_EffectID gsc_load_effectfile(gsc_SimData *d, const char *filename)
Populates a gsc_SimData combination with effect values.
struct gsc_MultiIDSet gsc_load_data_files(gsc_SimData *d, const char *genotype_file, const char *map_file, const char *effect_file, const gsc_FileFormatSpec format)
Populates a gsc_SimData object with marker allele data, a genetic map, and (optionally) marker effect...
gsc_GroupNum gsc_load_genotypefile(gsc_SimData *d, const char *filename, const gsc_FileFormatSpec format)
Load a set of genotypes to a gsc_SimData object.
gsc_MapID gsc_create_uniformspaced_recombmap(gsc_SimData *d, unsigned int n_markers, char **markernames, double expected_n_recombinations)
Create a uniformly-spaced gsc_RecombinationMap from a list of marker names and save to SimData.
void gsc_clear_simdata(gsc_SimData *d)
Clear a gsc_SimData object on the heap.
gsc_MapID gsc_create_unlinked_recombmap(gsc_SimData *d, unsigned int n_markers, char **markernames)
Create a gsc_RecombinationMap with independent assortment of alleles across a list of marker names,...
gsc_SimData * gsc_create_empty_simdata(unsigned int RNGseed)
Creator for an empty gsc_SimData object on the heap.
gsc_MapID gsc_load_mapfile(gsc_SimData *d, const char *filename)
Load a genetic map to a gsc_SimData object.
gsc_MapID gsc_create_recombmap_from_markerlist(gsc_SimData *d, unsigned int n_markers, struct gsc_MapfileUnit *markerlist)
Parse a list of markers/chrs/positions into a gsc_RecombinationMap and save to SimData.
int gsc_randpoi(rnd_pcg_t *rng, double lambda)
Generates randomly a number from the Poisson distribution with parameter lambda, using the Knuth appr...
gsc_DecimalMatrix gsc_generate_zero_dmatrix(const size_t r, const size_t c)
Generates a matrix of c columns, r rows with all 0.
void gsc_generate_clone(gsc_SimData *d, const char *parent_genome, char *output)
Get an identical copy of a given genotype.
void gsc_generate_doubled_haploid(gsc_SimData *d, const char *parent_genome, char *output, const unsigned int map_index)
Get the alleles of the outcome of producing a doubled haploid from a gamete from a given parent.
void gsc_generate_gamete(gsc_SimData *d, const char *parent_genome, char *output, const unsigned int map_index)
Fills a char* with the simulated result of meiosis (reduction and recombination) from the marker alle...
static int gsc_has_same_alleles_window(const char *g1, const char *g2, const size_t start, const size_t w)
Simple operator to determine if at markers with indexes i to i+w inclusive, two genotypes share at le...
int gsc_calculate_recombinations_from_file(gsc_SimData *d, const char *input_file, const char *output_file, int window_len, int certain)
Provides guesses as to the location of recombination events that led to the creation of certain genot...
int * gsc_calculate_min_recombinations_fw1(gsc_SimData *d, gsc_MapID mapid, char *parent1, unsigned int p1num, char *parent2, unsigned int p2num, char *offspring, int certain)
Identify markers in the genotype of offspring where recombination from its parents occured.
static int gsc_has_same_alleles(const char *p1, const char *p2, const size_t i)
Simple operator to determine if at marker i, two genotypes share at least one allele.
int * gsc_calculate_min_recombinations_fwn(gsc_SimData *d, gsc_MapID mapid, char *parent1, unsigned int p1num, char *parent2, unsigned int p2num, char *offspring, int window_size, int certain)
Identify markers in the genotype of offspring where recombination from its parents occured,...
void gsc_save_utility_markerblocks(FILE *f, const gsc_MarkerBlocks b, const unsigned int n_markers, char **const marker_names, const gsc_RecombinationMap *map)
Prints the markers contained in a set of blocks to a file.
void gsc_save_utility_dmatrix(FILE *f, gsc_DecimalMatrix *dec, char **row_headers, char **col_headers, _Bool dim1_is_columns)
Output the contents of a matrix to a file.
void gsc_save_utility_allele_counts(FILE *f, gsc_BidirectionalIterator *targets, unsigned int n_markers, char **const marker_names, const _Bool markers_as_rows, const char allele)
Prints allele counts of simulated genotypes to a file.
void gsc_save_utility_pedigrees(FILE *f, gsc_BidirectionalIterator *targets, const _Bool full_pedigree, const gsc_AlleleMatrix *parent_pedigree_store)
Prints pedigrees to a file.
void gsc_save_markerblocks(const char *fname, const gsc_SimData *d, const gsc_MarkerBlocks b, const gsc_MapID labelMapID)
Prints the markers contained in a set of blocks to a file.
void gsc_save_bvs(const char *fname, const gsc_SimData *d, const gsc_GroupNum groupID, const gsc_EffectID effID)
Prints breeding values of genotypes in the simulation to a file.
void gsc_save_local_bvs(const char *fname, const gsc_SimData *d, const gsc_GroupNum groupID, const gsc_MarkerBlocks b, const gsc_EffectID effID, const _Bool headers)
Prints local breeding values of candidates to a file.
void gsc_save_allele_counts(const char *fname, const gsc_SimData *d, const gsc_GroupNum groupID, const char allele, const _Bool markers_as_rows)
Prints allele counts of genotypes from the simulation to a file.
void gsc_save_genotypes(const char *fname, const gsc_SimData *d, const gsc_GroupNum groupID, const _Bool markers_as_rows)
Prints genotypes from the simulation to a file.
void gsc_save_utility_bvs(FILE *f, gsc_BidirectionalIterator *targets, const gsc_MarkerEffects *eff)
Calculate and print breeding values to a file.
void gsc_save_utility_genotypes(FILE *f, gsc_BidirectionalIterator *targets, unsigned int n_markers, char **const marker_names, const _Bool markers_as_rows)
Prints simulated genotypes to a file.
void gsc_save_pedigrees(const char *fname, const gsc_SimData *d, const gsc_GroupNum groupID, const _Bool full_pedigree)
Prints pedigrees of genotypes in the simulation to a file.
char * gsc_get_genes_of_index(const gsc_AlleleMatrix *start, const unsigned int index)
Get the alleles of a genotype by its index.
char * gsc_get_name_of_id(const gsc_AlleleMatrix *start, const gsc_PedigreeID id)
Returns the name of the genotype with a given id.
void gsc_get_ids_of_names(const gsc_AlleleMatrix *start, const size_t n_names, const char **names, gsc_PedigreeID *output)
Search for genotypes with certain names in a linked list of gsc_AlleleMatrix and save the ids of thos...
unsigned int gsc_get_index_of_child(const gsc_AlleleMatrix *start, const gsc_PedigreeID parent1id, const gsc_PedigreeID parent2id)
Search for a genotype with parentage matching two given parent ids in a linked list of gsc_AlleleMatr...
unsigned int gsc_get_index_of_name(const gsc_AlleleMatrix *start, const char *name)
Search for a genotype with a particular name in a linked list of gsc_AlleleMatrix,...
gsc_PedigreeID gsc_get_id_of_index(const gsc_AlleleMatrix *start, const unsigned int index)
Get the id of a genotype by its index.
int gsc_get_parents_of_id(const gsc_AlleleMatrix *start, const gsc_PedigreeID id, gsc_PedigreeID output[static 2])
Saves the ids of the parents of a genotype with a particular id to the output array output.
#define AlleleMatrix
#define NO_EFFECTSET
#define NO_MAP
#define get_alleles
#define delete_bidirectional_iter
#define NA
#define IS_VALID_LOCATION
#define INVALID_GENO_LOCATION
#define GenoLocation
#define NO_PEDIGREE
#define RandomAccessIterator
#define NO_GROUP
#define SimData
#define DecimalMatrix
#define FileFormatSpec
#define RecombinationMap
#define GSC_FINALISE_BUFFER(n, as, nentries)
Macro to convert a stretchy buffer to a solid heap vector.
gsc_GenotypeFileCellStyle
Represent possible representations of alleles at a marker in a genotype file.
gsc_TableFileReader gsc_tablefilereader_create(const char *filename)
Open a file for reading with gsc_TableFileReader.
#define GSC_NO_LABEL
Empty/null value for custom label identifiers.
#define GSC_STRETCH_BUFFER(n, newlen)
Macro to expand the capacity of a stretchy buffer.
#define GSC_NO_MAP
Empty/null value for recombination map identifiers.
void gsc_tablefilecell_deep_copy(gsc_TableFileCell *c)
Allocate memory to store a deep copy of a gsc_TableFileCell, if previously only a shallow copy.
void gsc_tablefilereader_close(gsc_TableFileReader *tbl)
Close a gsc_TableFileReader's file pointer.
#define GSC_DELETE_BUFFER(n)
Macro to delete a stretchy buffer.
#define GSC_NO_EFFECTSET
Empty/null value for effect set identifiers.
enum gsc_TableFileCurrentStatus gsc_helper_tablefilereader_classify_char(gsc_TableFileReader *tbl)
Classify the character under the cursor of a TableFileReader as cell contents or otherwise.
gsc_TableFileCell gsc_tablefilereader_get_next_cell(gsc_TableFileReader *tbl)
Read forwards in TableFileReader and return the next cell's contents, as well as how many column gaps...
gsc_TableFileCurrentStatus
Represent possible states of the cursor of a gsc_TableFileReader.
#define GSC_CREATE_BUFFER(n, type, length)
Macro to create a stretchy buffer of any type and some length.
const gsc_GenOptions GSC_BASIC_OPT
Default parameter values for GenOptions, to help with quick scripts and prototypes.
gsc_FileFormatSpec gsc_define_matrix_format_details(const GSC_LOGICVAL has_header, const GSC_LOGICVAL markers_as_rows, const enum gsc_GenotypeFileCellStyle cell_style)
Give genomicSimulation hints on the format of a genotype matrix file to be loaded.
void gsc_helper_tablefilereader_refill_buffer(gsc_TableFileReader *tbl)
Read another buffer's worth of characters from a gsc_TableFileReader's file.
#define GSC_NO_PEDIGREE
Empty/null value for pedigree fields.
gsc_GenotypeFileType
Enumerate types of genotype files that the simulation knows how to load.
#define GSC_NO_GROUP
Empty/null value for group allocations.
@ GSC_GENOTYPECELLSTYLE_SLASHPAIR
@ GSC_GENOTYPECELLSTYLE_PAIR
@ GSC_GENOTYPECELLSTYLE_UNKNOWN
@ GSC_GENOTYPECELLSTYLE_ENCODED
@ GSC_GENOTYPECELLSTYLE_COUNT
@ GSC_TABLEFILE_ERROR_EOF
@ GSC_TABLEFILE_ERROR_EOBUF
@ GSC_TABLEFILE_COLUMNGAP
@ GSC_TABLEFILE_NEWLINE
@ GSC_TABLEFILE_CONTENTS
@ GSC_GENOTYPEFILE_MATRIX
Either a marker-by-line matrix, where each marker is a row, or a line-by-marker matrix,...
@ GSC_GENOTYPEFILE_UNKNOWN
@ GSC_GENOTYPEFILE_VCF
@ GSC_GENOTYPEFILE_BED
@ GSC_GENOTYPEFILE_PED
gsc_GroupNum gsc_get_next_free_group_num(const size_t n_existing_groups, const gsc_GroupNum *existing_groups, size_t *cursor, gsc_GroupNum previous)
Iterator to get the next currently-free group number.
unsigned int gsc_get_from_ordered_pedigree_list(const gsc_PedigreeID target, const unsigned int listLen, const gsc_PedigreeID *list)
Binary search through list of unsigned integers.
gsc_GroupNum gsc_get_new_group_num(gsc_SimData *d)
Function to identify the next sequential integer that does not identify a group that currently has me...
void gsc_change_label_to(gsc_SimData *d, const gsc_GroupNum whichGroup, const gsc_LabelID whichLabel, const int setTo)
Set the values of a custom label.
struct gsc_TableSize gsc_get_file_dimensions(const char *filename, const char sep)
Opens a table file and reads the number of columns and rows (including headers) separated by sep into...
int gsc_get_integer_digits(const int i)
Count and return the number of digits in i.
unsigned int gsc_get_index_of_map(const gsc_SimData *d, const gsc_MapID map)
Function to identify the lookup index of a recombination map identifier.
unsigned int gsc_get_index_of_label(const gsc_SimData *d, const gsc_LabelID label)
Function to identify the label lookup index of a label identifier.
gsc_MapID gsc_get_new_map_id(const gsc_SimData *d)
Function to identify the next sequential integer that is not already allocated to a map ID in the sim...
void gsc_shuffle_up_to(rnd_pcg_t *rng, void *sequence, const size_t item_size, const size_t total_n, const size_t n_to_shuffle)
Produce a random ordering of the first n elements in an array using a (partial) Fisher-Yates shuffle.
void gsc_change_label_default(gsc_SimData *d, const gsc_LabelID whichLabel, const int newDefault)
Set the default value of a custom label.
size_t gsc_get_from_ordered_str_list(const char *target, const size_t listLen, const char **list)
Binary search through a list of strings.
unsigned int gsc_change_eff_set_centre_of_allele_count(gsc_SimData *d, const gsc_EffectID effset, const unsigned int n_markers, const char **marker_names, const double *centres, const char allele, const _Bool reset_centres)
Replace the centring values of specific markers in an effect set.
_Bool gsc_change_eff_set_centres_to_values(gsc_SimData *d, const gsc_EffectID effset, const unsigned int n_values, const double *values)
Replace the centring values of all markers in an effect set.
size_t gsc_get_from_unordered_str_list(const char *target, const size_t listLen, const char **list)
Linear search through a list of strings.
unsigned int gsc_change_eff_set_centre_of_markers(gsc_SimData *d, const gsc_EffectID effset, const unsigned int n_markers, const char **marker_names, const double *centres)
Replace the centring values of specific markers in an effect set.
gsc_EffectID gsc_get_new_eff_set_id(const gsc_SimData *d)
Function to identify the next sequential integer that is not already allocated to a marker effect set...
void gsc_get_n_new_group_nums(gsc_SimData *d, const size_t n, gsc_GroupNum *result)
Function to identify the next n sequential integers that do not identify a group that currently has m...
void gsc_change_label_to_values(gsc_SimData *d, const gsc_GroupNum whichGroup, const unsigned int startIndex, const gsc_LabelID whichLabel, const size_t n_values, const int *values)
Copy a vector of integers into a custom label.
gsc_LabelID gsc_get_new_label_id(const gsc_SimData *d)
Function to identify the next sequential integer that is not already allocated to a label in the simu...
unsigned int gsc_get_index_of_eff_set(const gsc_SimData *d, const gsc_EffectID eff_set_id)
Function to identify the lookup index of a marker effect set identifier.
gsc_LabelID gsc_create_new_label(gsc_SimData *d, const int setTo)
Initialises a new custom label.
void gsc_change_names_to_values(gsc_SimData *d, const gsc_GroupNum whichGroup, const unsigned int startIndex, const size_t n_values, const char **values)
Copy a vector of strings into the genotype name field.
_Bool gsc_get_index_of_genetic_marker(const char *target, gsc_KnownGenome g, unsigned int *out)
Return whether or not a marker name is present in the tracked markers, and at what index.
void gsc_change_allele_symbol(gsc_SimData *d, const char *which_marker, const char from, const char to)
Replace all occurences of a given allele with a different symbol representation.
void gsc_change_label_by_amount(gsc_SimData *d, const gsc_GroupNum whichGroup, const gsc_LabelID whichLabel, const int byValue)
Increment the values of a custom label.
void gsc_condense_allele_matrix(gsc_SimData *d)
A function to tidy the internal storage of genotypes after addition or deletion of genotypes in the g...
unsigned int gsc_randomdraw_replacementrules(gsc_SimData *d, unsigned int max, unsigned int cap, unsigned int *member_uses, unsigned int noCollision)
Randomly pick a number in a range, optionally with a cap on how many times a number can be picked,...
static gsc_GroupNum gsc_helper_split_by_allocator_knowncounts(gsc_GenoLocation loc, gsc_SimData *d, void *datastore, size_t n_outgroups, size_t *subgroupsfound, gsc_GroupNum *outgroups)
static void gsc_helper_genoptions_save_bvs(FILE *fe, gsc_MarkerEffects *effMatrices, unsigned int effIndex, gsc_AlleleMatrix *tosave)
save-as-you-go (breeding values)
static gsc_GroupNum gsc_helper_split_by_quality_halfsib2(gsc_GenoLocation loc, void *datastore, size_t maxgroups, size_t groupsfound, gsc_GroupNum *results)
static int gsc_helper_parentchooser_cross_randomly(void *parentIterator, union gsc_datastore_make_genotypes *datastore, unsigned int *counter, gsc_ParentChoice parents[static 2])
parentChooser function parameter for gsc_make_random_crosses.
static void gsc_helper_genoptions_save_genotypes(FILE *fg, gsc_AlleleMatrix *tosave)
save-as-you-go (genotypes/alleles)
static struct gsc_EmptyListNavigator gsc_create_emptylistnavigator(gsc_SimData *d, gsc_GroupNum allocation_group)
Create a new gsc_EmptyListNavigator, including an empty AlleleMatrix suitable for inserting into the ...
static gsc_EffectID gsc_helper_insert_eff_set_into_simdata(gsc_SimData *d, gsc_MarkerEffects effset)
Save a set of MarkerEffects to the SimData and allocate it an EffectID.
static FILE * gsc_helper_genoptions_save_pedigrees_setup(const gsc_GenOptions g)
Opens file for writing save-as-you-go pedigrees in accordance with gsc_GenOptions.
static void gsc_helper_output_genotypematrix_cell(FILE *f, gsc_GenoLocation loc, unsigned int markerix, void *GSC_NA)
Kernel for gsc_scaffold_save_genotype_info, when the goal is to save the (phased) allele pairs of eac...
static void gsc_helper_make_offspring_doubled_haploids(gsc_SimData *d, union gsc_datastore_make_genotypes *datastore, gsc_ParentChoice parents[static 2], gsc_GenoLocation putHere)
offspringGenerator function parameter for gsc_make_doubled_haploids.
static int gsc_helper_parentchooser_cross_targeted(void *parentIterator, union gsc_datastore_make_genotypes *datastore, unsigned int *counter, gsc_ParentChoice parents[static 2])
parentChooser function parameter for gsc_make_targeted_crosses.
static int gsc_helper_parentchooser_cross_randomly_between(void *parentIterator, union gsc_datastore_make_genotypes *datastore, unsigned int *counter, gsc_ParentChoice parents[static 2])
parentChooser function parameter for gsc_make_random_crosses_between.
static FILE * gsc_helper_genoptions_save_genotypes_setup(const gsc_SimData *d, const gsc_GenOptions g)
Opens file for writing save-as-you-go genotypes in accordance with gsc_GenOptions.
static gsc_MapID gsc_helper_insert_recombmap_into_simdata(gsc_SimData *d, gsc_RecombinationMap map)
Save a RecombinationMap to the SimData and allocate it a mapID.
static void gsc_set_names(gsc_AlleleMatrix *a, const char *prefix, const int suffix, const unsigned int from_index)
Fills the designated section of the .names array in an gsc_AlleleMatrix with the pattern "`prefix`ind...
static struct gsc_GenotypeFile_MatrixFormat gsc_helper_genotypefile_matrix_detect_orientation(const gsc_SimData *d, const gsc_TableFileCell *cellqueue, const size_t firstrowlen, const size_t queuelen, struct gsc_GenotypeFile_MatrixFormat format, const char *filenameforlog)
Determine whether a genotype matrix is row- or column-oriented.
static void gsc_scaffold_save_genotype_info(FILE *f, gsc_BidirectionalIterator *targets, unsigned int n_markers, char **const marker_names, const _Bool markers_as_rows, void(*bodycell_printer)(FILE *, gsc_GenoLocation, unsigned int, void *), void *bodycell_printer_data)
Prints a matrix of genotype information to a file.
static int gsc_helper_ascending_pdouble_comparer(const void *pp0, const void *pp1)
Comparator function for qsort.
static struct gsc_GenotypeFile_MatrixFormat gsc_helper_genotypefile_matrix_detect_header(const gsc_TableFileCell *cellqueue, const size_t firstrowlen, const size_t queuelen, struct gsc_GenotypeFile_MatrixFormat format, const char *filenameforlog)
Determine whether a genotype matrix has a header row or not.
static void gsc_helper_make_offspring_self_n_times(gsc_SimData *d, union gsc_datastore_make_genotypes *datastore, gsc_ParentChoice parents[static 2], gsc_GenoLocation putHere)
offspringGenerator function parameter for gsc_self_n_times.
static GSC_LOGICVAL gsc_helper_parse_ncell_header(int ncellrow1, gsc_TableFileCell *unprocessedqueue, int ncell_required, const char **titles_required, int ncell_optional, const char **titles_optional, int *col_order)
Header row reading and processing for map and effect set files.
static int gsc_helper_descending_pdouble_comparer(const void *pp0, const void *pp1)
Comparator function for qsort.
static int gsc_helper_ascending_double_comparer(const void *pp0, const void *pp1)
Comparator function for qsort.
static void gsc_helper_ancestry_intprinter_file(long unsigned int i, void *data)
Kernel for scaffold functions that require printing an integer to a file (as opposed to saving the in...
static gsc_GenoLocation gsc_emptylistnavigator_get_first(struct gsc_EmptyListNavigator *it)
Reset the cursor of a gsc_EmptyListNavigator to the first genotype.
static void gsc_emptylistnavigator_finaliselist(struct gsc_EmptyListNavigator *it)
Push emptylist edited genotypes into the SimData.
static gsc_GenoLocation gsc_emptylistnavigator_get_next(struct gsc_EmptyListNavigator *it)
Get the next sequential genotype in an gsc_EmptyListNavigator.
static int gsc_helper_mapfileunit_ascending_d_comparer(const void *p0, const void *p1)
Comparator function for qsort.
static gsc_GroupNum gsc_helper_split_by_quality_individuate(gsc_GenoLocation loc, void *datastore, size_t maxgroups, size_t groupsfound, gsc_GroupNum *results)
static gsc_GenoLocation gsc_nextgappy_valid_pos(struct gsc_GappyIterator *it)
Sets the current cursor position in a gsc_GappyIterator to the next valid position,...
static gsc_GroupNum gsc_helper_split_by_quality_halfsib1(gsc_GenoLocation loc, void *datastore, size_t maxgroups, size_t groupsfound, gsc_GroupNum *results)
static GSC_LOGICVAL gsc_helper_is_marker_in_chr(const unsigned int markerix, const gsc_LinkageGroup chr, double *pos)
Check if a marker index is found in a particular LinkageGroup, and provide its distance along the chr...
static void gsc_helper_genotypecell_to_allelematrix(gsc_GenoLocation loc, unsigned int markerix, enum gsc_GenotypeFileCellStyle style, char *cell, gsc_SimData *forrng)
Parse a string and save it as the alleles of a genotype at a particular location and genetic marker.
static void gsc_scaffold_save_ancestry_of(const gsc_AlleleMatrix *m, gsc_PedigreeID p1, gsc_PedigreeID p2, void(*strprinter)(char *, size_t, void *), void(*intprinter)(long unsigned int, void *), void *printer_data)
Identifies and saves (recursively) the pedigree of a pair of parents.
static gsc_TableFileCell gsc_helper_tablefilereader_get_next_cell_wqueue(gsc_TableFileReader *tf, gsc_TableFileCell **queue, size_t *queuesize)
Return the next cell from a queue of cells until the queue is exhausted, and thereafter read new cell...
static int gsc_helper_parentchooser_cloning(void *parentIterator, union gsc_datastore_make_genotypes *datastore, unsigned int *counter, gsc_ParentChoice parents[static 2])
parentChooser function parameter for gsc_make_clones.
static gsc_GroupNum gsc_helper_split_by_quality_halfsibtemplate(gsc_GenoLocation loc, void *datastore, size_t maxgroups, size_t groupsfound, gsc_GroupNum *results, gsc_PedigreeID(*getparent)(gsc_GenoLocation))
static gsc_GroupNum gsc_helper_split_by_allocator_equalprob(gsc_GenoLocation loc, gsc_SimData *d, void *datastore, size_t n_outgroups, size_t *subgroupsfound, gsc_GroupNum *outgroups)
static void gsc_helper_make_offspring_cross(gsc_SimData *d, union gsc_datastore_make_genotypes *datastore, gsc_ParentChoice parents[static 2], gsc_GenoLocation putHere)
offspringGenerator function parameter for all crossing functions
static void gsc_helper_genoptions_save_pedigrees(FILE *fp, gsc_SimData *d, gsc_AlleleMatrix *tosave)
save-as-you-go (pedigrees)
static int gsc_helper_mapfileunit_ascending_chr_comparer(const void *p0, const void *p1)
Comparator function for qsort.
static enum gsc_GenotypeFileCellStyle gsc_helper_genotype_matrix_identify_cell_style(gsc_TableFileCell c)
Identify what formatting a genotype matrix is representing alleles as.
static void * gsc_malloc_wrap(const size_t size, char exitonfail)
Replace calls to malloc direct with this function.
static int gsc_helper_parentchooser_selfing(void *parentIterator, union gsc_datastore_make_genotypes *datastore, unsigned int *counter, gsc_ParentChoice parents[static 2])
parentChooser function parameter for gsc_self_n_times.
static void gsc_helper_make_offspring_clones(gsc_SimData *d, union gsc_datastore_make_genotypes *datastore, gsc_ParentChoice parents[static 2], gsc_GenoLocation putHere)
offspringGenerator function parameter for gsc_make_clones.
gsc_GroupNum gsc_make_n_crosses_from_top_m_percent(gsc_SimData *d, const int n, const int m, const gsc_GroupNum group, const gsc_MapID mapID, const gsc_EffectID effID, const gsc_GenOptions g)
static void gsc_helper_genoptions_give_names_and_ids(gsc_AlleleMatrix *am, gsc_SimData *d, const gsc_GenOptions g)
Apply gsc_GenOptions naming scheme and gsc_PedigreeID allocation to a single gsc_AlleleMatrix.
static FILE * gsc_helper_genoptions_save_bvs_setup(const gsc_SimData *d, const gsc_GenOptions g, unsigned int *effIndexp)
Opens file for writing save-as-you-go breeding values in accordance with gsc_GenOptions.
static int gsc_helper_read_first_row(gsc_TableFileReader *tf, int min_headerlen, int max_headerlen, gsc_TableFileCell *outputq, size_t *queuesize)
Save the first row of cells from a table file to a queue.
static void gsc_helper_sort_markerlist(unsigned int n_markers, struct gsc_MapfileUnit *markerlist)
Sort markerlist by chromosome name, and by position within each chromosome.
static struct gsc_GenotypeFile_MatrixFormat gsc_helper_genotypefile_matrix_detect_cellstyle(const gsc_TableFileCell *cellqueue, const size_t firstrowlen, const size_t queuelen, struct gsc_GenotypeFile_MatrixFormat format, const char *filenameforlog)
Determine the style in which alleles are stored in a genotype matrix.
static int gsc_helper_effectfileunit_ascending_mix_comparer(const void *p0, const void *p1)
Comparator function for qsort.
static void gsc_helper_output_countmatrix_cell(FILE *f, gsc_GenoLocation loc, unsigned int markerix, void *data)
Kernel for gsc_scaffold_save_genotype_info, when the goal is to save the allele counts of a particula...
static gsc_GroupNum gsc_load_genotypefile_matrix(gsc_SimData *d, const char *filename, const gsc_FileFormatSpec format)
Loads a genotype file, with or without existing genome model in the SimData.
static int gsc_helper_indirect_alphabetical_str_comparer(const void *p0, const void *p1)
Comparator function for qsort.
static unsigned int gsc_helper_str_markerlist_leftjoin(gsc_KnownGenome g, unsigned int n_markers_in_list, struct gsc_MapfileUnit **markerlist)
Discard markers whose names are not present in a gsc_KnownGenome.
static size_t gsc_helper_parse_mapfile(const char *filename, struct gsc_MapfileUnit **out)
Extract the contents of a genetic map file.
static gsc_GenoLocation gsc_nextgappy_get_gap(struct gsc_GappyIterator *it)
Sets the current cursor position in a gsc_GappyIterator to the next empty position,...
static GSC_LOGICVAL gsc_helper_genotypefile_matrix_detect_cornercell_presence(const size_t ncellsfirstrow, const size_t ncellssecondrow, const _Bool secondrowheaderisempty)
Determine whether a genotype matrix has a corner cell or not.
static unsigned int gsc_helper_random_cross_checks(gsc_SimData *d, const gsc_GroupNum from_group, const unsigned int n_crosses, const unsigned int cap)
Check input parameters of random crossing functions.
static gsc_GenoLocation gsc_nextgappy_get_nongap(struct gsc_GappyIterator *it)
Sets the current cursor position in a gsc_GappyIterator to the next filled position,...
static void gsc_helper_ancestry_strprinter_file(char *str, size_t strlen, void *data)
Kernel for scaffold functions that require printing a string to a file (as opposed to saving the stri...
static gsc_GroupNum gsc_helper_split_by_allocator_unequalprob(gsc_GenoLocation loc, gsc_SimData *d, void *datastore, size_t n_outgroups, size_t *subgroupsfound, gsc_GroupNum *outgroups)
static gsc_GroupNum gsc_helper_split_by_quality_family(gsc_GenoLocation loc, void *datastore, size_t maxgroups, size_t groupsfound, gsc_GroupNum *results)
#define GSC_ID_T
genomicSimulation's "ID" type
#define GSC_NA_IDX
When accessing the current array index of a unique session ID, the "ID not found"/failure value is -1...
#define GSC_GLOBALX_T
genomicSimulation's "Candidate global index" type
#define GSC_GENOLEN_T
genomicSimulation's "Genotype length" type
GSC_LOGICVAL
genomicSimulation's "logical value" type
@ GSC_TRUE
@ GSC_FALSE
@ GSC_NA
#define GSC_NA_LOCALX
For candidate local indexes, the INVALID/UNINITIALISED value is -1 (for signed types) or the maximum ...
#define GSC_LOCALX_T
genomicSimulation's "Candidate local index" type
#define GSC_MALLOC(size)
#define GSC_NA_ID
For unique session IDs, the INVALID/UNINITIALISED value is 0.
#define GSC_NA_GLOBALX
For candidate global indexes, the INVALID/UNINITIALISED value is -1 (for signed types) or the maximum...
#define GSC_FREE(ptr)
#define CONTIG_WIDTH
#define NAME_LENGTH
char * names[1000]
Array of dynamically allocated strings containing the names of the lines/genotypes in this matrix.
unsigned int n_genotypes
Number of genotypes currently loaded in this matrix.
unsigned int n_markers
Number of markers across which genotypes are tracked.
char * alleles[1000]
A matrix of SNP markers by lines/genotypes containing pairs of alleles eg TT, TA.
unsigned int n_labels
Number of custom labels currently available to this gsc_AlleleMatrix.
gsc_AlleleMatrix * next
Pointer to the next gsc_AlleleMatrix in the linked list, or NULL if this entry is the last.
int ** labels
Pointer to list of labels.
gsc_PedigreeID ids[1000]
Unique ID for each genotype.
gsc_GroupNum groups[1000]
Group allocation of each genotype.
gsc_PedigreeID pedigrees[2][1000]
Two lists of integer IDs of the parents of this genotype (if tracked), or 0 if we don't know/care.
A structure to iterate forwards and backwards through all genotypes in a gsc_SimData or through only ...
_Bool atEnd
Boolean that is TRUE if the iterator's 'cursor' is on the last genotype (genotype with the highest in...
unsigned int cachedAMIndex
Index of cachedAM in the linked list of gsc_AlleleMatrix beginning at d->m.
const gsc_GroupNum group
Group through which to iterate.
gsc_AlleleMatrix * cachedAM
Pointer to the gsc_AlleleMatrix from the linked list of gsc_AlleleMatrix beginning at d->m where the ...
_Bool atStart
Boolean that is TRUE if the iterator's 'cursor' is on the first genotype (genotype with the lowest in...
unsigned int localPos
Local index (index within the cachedAM) of the genotype in the linked list of gsc_AlleleMatrix beginn...
gsc_AlleleMatrix * am
Simulation genotypes through which to iterate.
A row-major heap matrix that contains floating point numbers.
size_t dim1
Number of rows in the matrix.
double ** matrix
The actual matrix and contents.
size_t dim2
number of columns in the matrix
A type representing a particular loaded set of marker effects.
unsigned int id
Unprocessed data for one marker effect loaded from an effect file.
unsigned int markerix
A structure to hold an initially empty AlleleMatrix list whose genotypes can be accessed sequentially...
gsc_PedigreeID currentid
gsc_AlleleMatrix * firstAM
gsc_AlleleMatrix * localAM
File format specifier for the genotype input file.
union gsc_FileFormatSpec::@7 spec
struct gsc_GenotypeFile_MatrixFormat matrix
enum gsc_GenotypeFileType filetype
A structure to iterate forwards through all positions in the gsc_AlleleMatrix linked list in gsc_SimD...
gsc_GenoLocation cursor
unsigned int cursorAMIndex
A type that contains choices of settings for gsc_SimData functions that create a new gsc_AlleleMatrix...
_Bool will_allocate_ids
A boolean: whether to allocate generated offspring session- unique IDs.
_Bool will_track_pedigree
A boolean: whether to track parentage of generated offspring.
_Bool will_name_offspring
A boolean: whether generated offspring should be given names.
const char * filename_prefix
A string used in save-as-you-go file names.
const char * offspring_name_prefix
If will_name_offspring is true, generated offspring are named with the concatenation {offspring_name_...
gsc_EffectID will_save_bvs_to_file
If equal to NO_EFFECTSET, no bvs are calculated or saved.
_Bool will_save_pedigree_to_file
A boolean.
unsigned int family_size
The number of offspring to produce from each cross.
_Bool will_save_to_simdata
A boolean.
_Bool will_save_alleles_to_file
A boolean.
An gsc_AlleleMatrix/gsc_AlleleMatrix index coordinate of a particular genotype in the simulation.
gsc_AlleleMatrix * localAM
Pointer to the gsc_AlleleMatrix in which the genotype can be found.
unsigned int localPos
Index in the localAM where the genotype can be found (min value: 0.
Variants in the format of a genotype matrix file.
enum gsc_GenotypeFileCellStyle cell_style
< Boolean: Are genetic markers the rows of the matrix (GSC_TRUE) or the columns of the matrix (GSC_FA...
GSC_LOGICVAL markers_as_rows
< Boolean: Is the first row of the file a header row? (Note: genotype matrix files must have row head...
A type representing the identifier of a group of genotypes.
unsigned int num
A type that stores the genome structure used in simulation.
char ** marker_names
A vector of n_markers strings containing the names of markers, ordered according to their index in an...
gsc_RecombinationMap * maps
A vector of n_maps recombination maps, to use for simulating meiosis.
char *** names_alphabetical
A vector of n_markers pointers to names in marker_names, ordered in alphabetical order of the names.
unsigned int n_markers
The total number of markers.
unsigned int n_maps
The number of recombination maps currently stored.
gsc_MapID * map_ids
A vector of n_maps identifiers for each of the recombination maps currently stored.
A type representing a particular custom label.
unsigned int id
A generic store for a linkage group, used to simulate meiosis on a certain subset of markers.
gsc_ReorderedLinkageGroup reorder
enum gsc_LinkageGroup::gsc_LinkageGroupType type
gsc_SimpleLinkageGroup simple
union gsc_LinkageGroup::@6 map
A type representing a particular loaded recombination map.
unsigned int id
Unprocessed data for one marker (linkage group and position) loaded from a map file.
A struct used to store a set of blocks of markers.
unsigned int num_blocks
The number of blocks whose details are stored here.
unsigned int * num_markers_in_block
Pointer to a heap array of length num_blocks containing the number of markers that make up each block...
unsigned int ** markers_in_block
Pointer to a heap array of length num_blocks, each entry in which is a pointer to a heap array with l...
A type that stores the information needed to calculate breeding values from alleles at markers.
unsigned int * cumn_alleles
A vector of length n_markers holding the cumulative number of alleles that have effects on breeding v...
double * eff
A vector holding the effect on breeding value of each allele at each marker.
unsigned int n_markers
Number of markers across which genotypes are tracked.
double * centre
Vector of length n_markers, containing a value for each marker which represents the value to subtract...
char * allele
A vector holding the symbol/character representing each allele at each marker.
Simple crate that stores a GroupNum, a MapID, and an EffectID.
gsc_EffectID effSet
gsc_GroupNum group
gsc_GenoLocation loc
Location in the simulation where this parent is stored.
unsigned int mapindex
Index in d->genome.maps of the recombination map to use when producing gametes from this parent.
A type representing a program-lifetime-unique identifier for a genotype, to be used in tracking pedig...
unsigned int id
A structure to search and cache indexes of all genotypes in a gsc_SimData or of all the members of a ...
unsigned int cacheSize
Length in gsc_GenoLocations of cache
const gsc_GroupNum group
Group through which to iterate.
unsigned int largestCached
Local/group index (that is, index in cache) of the highest cell in cache that has been filled.
gsc_SimData * d
Simulation data through which to iterate.
unsigned int groupSize
If the number of genotypes in the simulation that fulfil the iterator's group criteria is known,...
gsc_GenoLocation * cache
Array iteratively updated with the known genotypes in the simulation that fulfil the group criteria o...
A type that stores linkage groups and crossover probabilities for simulating meiosis.
size_t n_chr
The number of chromosomes/linkage groups represented in the map.
gsc_LinkageGroup * chrs
Vector of n_chr recombination maps, one for each chromosome/linkage group in this recombination map.
char ** chr_names
An identifying code for each chromosome/linkage group in the map.
unsigned int n_markers
The number of markers in this chromosome/linkage group.
double expected_n_crossovers
Expected value of the Poisson distribution from which the number of crossovers in this linkage group ...
unsigned int * marker_indexes
Array with n_markers entries.
double * dists
Array with n_markers entries, containing at position i the distance in centimorgans along the linkage...
Composite type that is used to run crossing simulations.
unsigned int n_labels
The number of custom labels in the simulation.
gsc_KnownGenome genome
A gsc_KnownGenome, which stores the information of known markers and linkage groups,...
gsc_LabelID * label_ids
The identifier number of each label in the simulation, in order of their lookup index.
gsc_EffectID * eff_set_ids
The identifier number of each set of allele effects in the simulation, ordered by their lookup index.
int * label_defaults
Array containing the default (birth) value of each custom label.
unsigned int n_groups
Number of groups currently existing in simulation.
unsigned int n_eff_sets
The number of sets of allele effects in the simulation.
gsc_PedigreeID current_id
Highest SimData-unique ID that has been generated so far.
rnd_pcg_t rng
Random number generator working memory.
gsc_AlleleMatrix * m
Pointer to an gsc_AlleleMatrix, which stores data and metadata of founders and simulated offspring.
gsc_MarkerEffects * e
Array of n_eff_sets gsc_MarkerEffects, optional for the use of the simulation.
unsigned int first_marker_index
The index of the first marker in this chromosome/linkage group in the simulation's corresponding gsc_...
unsigned int n_markers
The number of markers in this chromosome/linkage group.
double * dists
Array with n_markers entries, containing at position i the distance in centimorgans along the linkage...
double expected_n_crossovers
Expected value of the Poisson distribution from which the number of crossovers in this linkage group ...
Represent a cell read by a gsc_TableFileReader.
int predCol
since last read, how many column gaps have there been?
char * cell
deep copy of the cell contents, or NULL
_Bool isCellShallow
is the string in 'cell' a shallow copy or deep copy?
int predNewline
since last read, how many newlines have there been?
_Bool eof
are we (this cell) at end of file
size_t cell_len
length of cell contents (because a shallow copy may not be null-terminated)
Stream reader for files of some tabular format.
int buf_fill
Number of characters from the file that are currently loaded in buf.
char buf[8192]
A window of characters from the file, loaded into memory for current processing.
int cursor
Index in buf of the first character that the file reader has not yet parsed.
FILE * fp
File being read.
struct gsc_datastore_make_genotypes::@5 clones
struct gsc_datastore_make_genotypes::@3 selfing
struct gsc_datastore_make_genotypes::@1 rand_btwn
struct gsc_datastore_make_genotypes::@0 rand
struct gsc_datastore_make_genotypes::@2 targeted
struct gsc_datastore_make_genotypes::@4 doub_haps