12 .offspring_name_prefix = NULL,
16 .filename_prefix = NULL,
36 fprintf(stderr,
"0 memory allocation requested.\n");
42 fprintf(stderr,
"Memory allocation failed. Exiting.\n"); exit(2);
44 fprintf(stderr,
"Memory allocation failed.\n");
64 const int* labelDefaults,
74 memset(m->
alleles[i], 0,
sizeof(
char) * (n_markers<<1));
81 for (
GSC_ID_T i = 0; i < n_labels; ++i) {
84 m->
labels[i][j] = labelDefaults[i];
87 }
else if (n_labels == 0) {
90 fprintf(stderr,
"Invalid negative number of labels provided to gsc_create_empty_allelematrix");
126 rnd_pcg_seed( &d->
rng, RNGseed );
143 if (d == NULL)
return;
209 double target = exp(-lambda);
210 double p = rnd_pcg_nextf(rng);
213 p *= rnd_pcg_nextf(rng);
247 if ((fp = fopen(filename,
"r")) == NULL) {
248 fprintf(stderr,
"Failed to open file %s.\n", filename); exit(1);
252 while (c != EOF && c !=
'\n') {
273 if (has_length && sep_count != details.
num_columns-1) {
277 fprintf(stderr,
"Bad columns on row %d\n", details.
num_rows + 1); exit(1);
282 }
else if (c == sep) {
401 while (list[index].
id != target.
id && first <= last) {
405 if (index+lookahead <= last && list[index+lookahead].
id !=
GSC_NO_PEDIGREE.id) {
406 if (list[index+lookahead].
id == target.
id) {
407 return index+lookahead;
408 }
else if (list[index+lookahead].
id < target.
id) {
409 first = index+lookahead + 1;
415 }
else if (index-lookahead <= last && list[index-lookahead].
id !=
GSC_NO_PEDIGREE.id) {
416 if (list[index-lookahead].
id == target.
id) {
417 return index-lookahead;
418 }
else if (list[index-lookahead].
id < target.
id) {
422 last = index-lookahead - 1;
427 if (index+lookahead <= last || index-lookahead >= first) {
434 if (list[index].
id == target.
id) {
436 }
else if (list[index].
id < target.
id) {
444 index = (first + last) / 2;
470 const size_t listLen,
472 for (
size_t i = 0; i < listLen; ++i) {
473 if (strcmp(list[i], target) == 0) {
497 const size_t listLen,
499 size_t first = 0, last = listLen - 1;
500 size_t index = (first + last) / 2;
501 int comparison = strcmp(target,list[index]);
502 while (comparison != 0 && first <= last) {
503 if (comparison == 0) {
505 }
else if (comparison < 0) {
512 index = (first + last) / 2;
513 comparison = strcmp(target, list[index]);
543 const size_t item_size,
544 const size_t total_n,
545 const size_t n_to_shuffle) {
546 if (n_to_shuffle > 1) {
549 void* tmp = &tmp_spot;
550 if (item_size >
sizeof(tmp_spot)) {
554 size_t maxi = total_n > n_to_shuffle ? n_to_shuffle - 1 : total_n - 1;
556 for (i = 0; i <= maxi; ++i) {
558 size_t j = i + rnd_pcg_range(rng,0,total_n - i - 1);
561 memcpy(&tmp, sequence + j*item_size, item_size);
562 memcpy(sequence + j*item_size, sequence + i*item_size, item_size);
563 memcpy(sequence + i*item_size, &tmp, item_size);
566 if (item_size >
sizeof(tmp_spot)) {
592 if (prefix == NULL) {
598 sprintf(format,
"%s%s", prefix, sname);
600 int livingsuffix = suffix;
604 if (a->
names[i] != NULL) {
609 sprintf(sname, format, livingsuffix);
611 strcpy(a->
names[i], sname);
662 memset(new_label_defaults, 0,
sizeof(
int) * d->
n_labels);
664 new_label_defaults[d->
n_labels] = setTo;
668 fprintf(stderr,
"Labels malformed; gsc_SimData may be corrupted\n");
693 int** oldLabelList = m->
labels;
696 m->
labels[i] = oldLabelList[i];
706 m->
labels[newLabel][i] = setTo;
725 }
else if (!warned) {
726 fprintf(stderr,
"Unable to create new label for all genotypes; gsc_SimData may be corrupted\n");
748 const int newDefault) {
751 fprintf(stderr,
"Nonexistent label %lu\n", (
long unsigned int) whichLabel.
id);
778 fprintf(stderr,
"Nonexistent label %lu\n", (
long unsigned int) whichLabel.
id);
790 m->
labels[labelIndex][i] = setTo;
804 m->
labels[labelIndex][i] = setTo;
835 fprintf(stderr,
"Nonexistent label %lu\n", (
long unsigned int) whichLabel.
id);
847 m->
labels[labelIndex][i] += byValue;
857 m->
labels[labelIndex][i] += byValue;
892 const size_t n_values,
896 fprintf(stderr,
"Nonexistent label %lu\n", (
long unsigned int) whichLabel.
id);
909 if (currentIndex >= startIndex) {
910 m->
labels[labelIndex][i] = values[currentIndex - startIndex];
913 if (currentIndex > startIndex && currentIndex - startIndex >= n_values) {
926 if (currentIndex >= startIndex) {
927 m->
labels[labelIndex][i] = values[currentIndex - startIndex];
930 if (currentIndex > startIndex && currentIndex - startIndex >= n_values) {
969 const size_t n_values,
970 const char** values) {
982 if (currentIndex >= startIndex) {
984 if (m->
names[i] != NULL) {
991 strcpy(m->
names[i], values[whichName]);
994 if (currentIndex > n_values) {
1008 if (currentIndex >= startIndex) {
1010 if (m->
names[i] != NULL) {
1016 const int nameLen = strlen(values[whichName]);
1018 strncpy(m->
names[i], values[whichName], nameLen);
1021 if (currentIndex > n_values) {
1054 const char* which_marker,
1059 unsigned int nalleles = 0;
1062 if (which_marker == NULL) {
1118 printf(
"Changed allele %c to %c %lu times across %lu markers and %lu genotypes\n",
1119 from, to, (
long unsigned int)nalleles, (
long unsigned int)nmarkers, (
long unsigned int)ngenos);
1146 const double* values) {
1149 fprintf(stderr,
"Nonexistent effect set with id %lu\n", (
long unsigned int) effset.
id);
1154 fprintf(stderr,
"Cannot use these values as marker effect centres because the number of values is not equal to the number of markers in the effect set\n");
1158 if (d->
e[effIndex].
centre == NULL) {
1189 const char** marker_names,
1190 const double* centres) {
1193 fprintf(stderr,
"Nonexistent effect set with id %lu\n", (
long unsigned int) effset.
id);
1211 fprintf(stderr,
"Could not find marker named %s in the list of tracked markers\n", marker_names[ix]);
1214 e->
centre[markerix] = centres[ix];
1259 const char** marker_names,
1260 const double* centres,
1262 const _Bool reset_centres) {
1265 fprintf(stderr,
"Nonexistent effect set with id %lu\n", (
long unsigned int) effset.
id);
1275 }
else if (reset_centres) {
1288 a < e->cumn_alleles[markerix]; ++a) {
1289 if (e->
allele[a] == allele) {
1293 double mcentre = centres[ix] * e->
eff[a];
1294 e->
centre[markerix] = reset_centres ? mcentre : mcentre + e->
centre[markerix];
1299 fprintf(stderr,
"Could not find effect value for allele %c at marker %s\n", allele, marker_names[ix]);
1304 fprintf(stderr,
"Could not find marker named %s in the list of tracked markers\n", marker_names[ix]);
1317 int digits = 0, ii = i;
1333 double d0 = **(
double **)pp0;
1334 double d1 = **(
double **)pp1;
1349 double d0 = *(
double *)pp0;
1350 double d1 = *(
double *)pp1;
1366 double d0 = **(
double **)pp0;
1367 double d1 = **(
double **)pp1;
1379 char* str1 = **(
char***)p0;
1380 char* str2 = **(
char***)p1;
1381 return strcmp(str1,str2);
1391 return strcmp(s0.
chr, s1.
chr);
1429 int* label_defaults) {
1434 fprintf(stderr,
"In moving a genotype from %p:%lu to %p:%lu, the genotype at %p:%lu will be overwritten\n",
1457 fprintf(stderr,
"Origin and destination when copying genotype do not have the same number of custom"
1458 " labels (n_labels). The genotype now at %p:%lu will have lost its label data\n",
1461 fprintf(stderr,
"Label defaults must be supplied to gsc_move_genotypes or there is risk of "
1462 "corrupted label values in further use of the simulation");
1600 if (previous != NULL) {
1601 previous->next = NULL;
1710 if (firstAM == NULL) {
1723 if (firstAM->
next == NULL) {
1727 firstAM = firstAM->
next;
1746 firstAM = firstAM->
next;
1747 if (firstAM == NULL) {
1776 .cacheSize = cacheSize,
1791 unsigned int currentIndex = 0;
1793 if (am == NULL)
return NULL;
1794 while (currentIndex < n) {
1795 if (am->
next == NULL) {
1818 unsigned int firstAMIndex = 0;
1824 if (firstAM == NULL) {
1829 if (firstAM->
next == NULL) {
1833 firstAM = firstAM->
next;
1857 firstAM = firstAM->
next;
1859 if (firstAM == NULL) {
1898 unsigned int lastAMIndex = 0;
1905 if (lastAM == NULL) {
1910 lastAM = lastAM->
next;
1923 lastAM = lastAM->
next;
2014 if (nextAM != NULL) {
2016 nextAM = nextAM->
next;
2018 }
while (nextAM != NULL && nextAM->
n_genotypes == 0);
2021 if (nextAM == NULL) {
2055 if (nextAM != NULL) {
2057 nextAM = nextAM->
next;
2059 }
while (nextAM != NULL && nextAM->
n_genotypes == 0);
2062 if (nextAM == NULL) {
2136 if (nextAM != NULL) {
2140 }
while (nextAM != NULL && nextAM->
n_genotypes == 0);
2143 if (nextAM == NULL) {
2182 if (nextAM != NULL) {
2186 }
while (nextAM != NULL && nextAM->
n_genotypes == 0);
2189 if (nextAM == NULL) {
2248 if (n < it->cacheSize) {
2263 if (expectedLocation.
localAM == NULL ||
2267 return expectedLocation;
2286 for (; localPos < currentAM->
n_genotypes; ++localPos) {
2297 newCacheSize = newCacheSize << 1;
2307 it->
cache = newCache;
2314 .localPos = localPos
2317 return it->
cache[n];
2327 currentAM = currentAM->
next;
2358 fprintf(stderr,
"Invalid ID %lu\n", (
long unsigned int)
id.
id);
2361 if (start == NULL) {
2362 fprintf(stderr,
"Invalid nonexistent allelematrix\n"); exit(1);
2377 if (m->
next == NULL) {
2378 fprintf(stderr,
"Could not find the ID %lu: did you prematurely delete this genotype?\n", (
long unsigned int)
id.
id);
2386 return m->
names[index];
2390 if (m->
next == NULL) {
2391 fprintf(stderr,
"Could not find the ID %lu: did you prematurely delete this genotype?\n", (
long unsigned int)
id.
id);
2428 if (start == NULL) {
2429 fprintf(stderr,
"Invalid nonexistent allelematrix\n"); exit(1);
2459 if (m->
next == NULL) {
2460 fprintf(stderr,
"Unable to locate ID %lu in simulation memory (genotype has likely been deleted): pedigree past this point cannot be determined\n", (
long unsigned int)
id.
id);
2484 const size_t n_names,
2487 if (start == NULL || (start->
n_genotypes <= 0 && start->
next == NULL)) {
2488 fprintf(stderr,
"Invalid start parameter: gsc_AlleleMatrix* `start` must exist\n");
2492 fprintf(stderr,
"Invalid n_names parameter: Search list length must be positive\n");
2499 for (
size_t i = 0; i < n_names; ++i) {
2506 if (strcmp(m->
names[j], names[i]) == 0) {
2508 output[i] = m->
ids[j];
2516 if ((m = m->
next) == NULL) {
2517 fprintf(stderr,
"Didn't find the name %s\n", names[i]);
2540 if (start == NULL || (start->
n_genotypes <= 0 && start->
next == NULL)) {
2541 fprintf(stderr,
"Invalid start parameter: gsc_AlleleMatrix* `start` must exist\n");
2556 if ((m = m->
next) == NULL) {
2557 fprintf(stderr,
"Didn't find the child of %lu & %lu\n",
2558 (
long unsigned int)parent1id.
id, (
long unsigned int)parent2id.
id);
2581 if (start == NULL || (start->
n_genotypes <= 0 && start->
next == NULL)) {
2582 fprintf(stderr,
"Invalid start parameter: gsc_AlleleMatrix* `start` must exist\n");
2591 if (m->
names[j] != NULL && strcmp(m->
names[j], name) == 0) {
2596 if ((m = m->
next) == NULL) {
2597 fprintf(stderr,
"Didn't find the name %s\n", name);
2616 if (start == NULL) {
2617 fprintf(stderr,
"Invalid start parameter: gsc_AlleleMatrix* `start` must exist\n");
2624 if (total_j == index) {
2626 }
else if (total_j < index && total_j + m->n_genotypes > index) {
2627 return m->
ids[index - total_j];
2631 if ((m = m->
next) == NULL) {
2632 fprintf(stderr,
"Didn't find the index %lu\n", (
long unsigned int) index);
2655 if (start == NULL) {
2656 fprintf(stderr,
"Invalid nonexistent allelematrix\n");
2663 if (total_j == index) {
2665 }
else if (total_j < index && total_j + m->n_genotypes > index) {
2666 return m->
alleles[index - total_j];
2670 if ((m = m->
next) == NULL) {
2671 fprintf(stderr,
"Didn't find the index %lu\n", (
long unsigned int) index);
2695 const size_t list_len,
2702 for (; i < list_len; ++i) {
2708 outGroup = candidate;
2713 int remaininglistlen = list_len - i;
2714 if (remaininglistlen < 2) {
2716 }
else if (remaininglistlen == 2) {
2717 if (grouplist[i].num == grouplist[i+1].num) {
2737 memset(isDuplicate, 0,
sizeof(_Bool)*remaininglistlen);
2738 for (
size_t ii = i; ii < list_len; ++ii) {
2739 for (
size_t jj = ii+1; jj < list_len; ++jj) {
2740 if (grouplist[ii].num == grouplist[jj].num) {
2741 isDuplicate[jj-i] = 1;
2747 memset(anyFound, 0,
sizeof(_Bool)*remaininglistlen);
2757 for (
size_t k = i+1; k < list_len; ++k) {
2760 cachedgroup = grouplist[k];
2770 size_t groupsgone = 0;
2771 for (
size_t j = 0; j < remaininglistlen; ++j) {
2772 if (!isDuplicate[j] && anyFound[j]) {
2801 const size_t index_list_len,
2803 if (index_list_len < 1) {
2804 fprintf(stderr,
"Invalid index_list_len value: length of allocation list must be at least 1\n");
2810 size_t invalidLocations = 0;
2811 for (
size_t i = 0; i < index_list_len; ++i) {
2820 if (invalidLocations > 0) {
2821 fprintf(stderr,
"%lu indexes were invalid\n",(
long unsigned int)invalidLocations);
2825 if (invalidLocations < index_list_len) {
2857 const int valueToSplit) {
2860 fprintf(stderr,
"Nonexistent label %lu\n", (
long unsigned int)whichLabel.
id);
2913 const int valueLowBound,
2914 const int valueHighBound) {
2917 fprintf(stderr,
"Nonexistent label %lu\n", (
long unsigned int)whichLabel.
id);
2920 if (valueLowBound > valueHighBound) {
2921 fprintf(stderr,
"Empty range %d to %d: no group created\n", valueLowBound, valueHighBound);
2972 void* somequality_data,
2978 size_t maxentries_results,
2985 size_t bookmark = 0;
2991 size_t splitgroupsize = 0;
2992 for (
size_t i = 0; i < n_groups; ++i) {
2993 if (currentgroups[i].num == group_id.
num) {
2994 splitgroupsize = currentsizes[i];
2999 if (splitgroupsize == 0) {
3004 size_t subgroupsfound = 0;
3011 gsc_GroupNum assignedgroup = somequality_tester(loc, somequality_data,
3012 splitgroupsize, subgroupsfound, outgroups);
3016 assignedgroup = nextgroup;
3017 outgroups[subgroupsfound] = nextgroup;
3029 if (maxentries_results < subgroupsfound) {
3030 memcpy(results,outgroups,
sizeof(
gsc_GroupNum)*maxentries_results);
3031 fprintf(stderr,
"Output vector size is not large enough to hold all created groups: "
3032 " output list of gsc_GroupNums has been truncated\n");
3034 memcpy(results,outgroups,
sizeof(
gsc_GroupNum)*subgroupsfound);
3037 return subgroupsfound;
3048 for (
size_t j = 0; j < groupsfound; ++j) {
3049 if (getparent(loc).id == familyidentities[j].
id) {
3054 if (groupsfound > maxgroups) {
3055 fprintf(stderr,
"Attempted to split into more groups than caller deemed possible. "
3056 "There is a bug in the simulation tool if you can reach this state.");
3057 return results[maxgroups-1];
3060 familyidentities[groupsfound] = getparent(loc);
3118 size_t maxentries_results,
3120 if (!(parent == 1 || parent == 2)) {
3121 fprintf(stderr,
"Value error: `parent` must be 1 or 2.");
3134 maxentries_results, results);
3138 maxentries_results, results);
3152 for (
size_t j = 0; j < groupsfound; ++j) {
3159 if (groupsfound > maxgroups) {
3160 fprintf(stderr,
"Attempted to split into more groups than caller deemed possible. "
3161 "There is a bug in the simulation tool if you can reach this state.");
3162 return results[maxgroups-1];
3199 size_t maxentries_results,
3203 if (maxgroups < 2) {
3209 familyidentities[0] = p1identity;
3210 familyidentities[1] = p2identity;
3214 maxentries_results, results);
3256 size_t maxentries_results,
3262 maxentries_results, results);
3294 fprintf(stderr,
"Group %lu does not exist\n", (
long unsigned int) group_id.
num);
3296 fprintf(stderr,
"Group %lu has only one member so can't be split\n", (
long unsigned int) group_id.
num);
3345 void* someallocator_data,
3358 size_t subgroupsfound = 0;
3364 gsc_GroupNum assignedgroup = someallocator(loc, d, someallocator_data,
3365 n_outgroups, &subgroupsfound, outgroups);
3369 allocationfailures++;
3375 if (subgroupsfound > 1) {
3378 if (allocationfailures > 0) {
3379 fprintf(stderr,
"While splitting group %lu, %lu allocations to new groups failed so they remain"
3380 " in the original group\n",
3381 (
long unsigned int) group_id.
num, (
long unsigned int) allocationfailures);
3383 return subgroupsfound;
3392 size_t* subgroupsfound,
3395 *subgroupsfound = n_outgroups;
3397 int randpos = rnd_pcg_range(&d->
rng,0,cumulative_counts[n_outgroups-1] - 1);
3401 for (; j < n_outgroups; ++j) {
3402 if (randpos < cumulative_counts[j]) {
3403 chosengroup = outgroups[j];
3407 for (; j < n_outgroups; ++j) {
3408 cumulative_counts[j]--;
3439 fprintf(stderr,
"Invalid n value: number of fractions into which to split group must be at least 2\n");
3449 for (
size_t i = 0; i < n; ++i) {
3450 boxes[i] = each_size;
3455 boxes[i] += boxes[i-1];
3460 if (results == NULL) {
3517 fprintf(stderr,
"Invalid n value: number of fractions into which to split group must be at least 2\n");
3524 for (
size_t j = 0; j < n - 1; ++j) {
3526 cumulative_counts[j] = sum;
3528 if (cumulative_counts[n-2] > cumulative_counts[n-1]) {
3529 fprintf(stderr,
"Provided capacities are larger than actual group: some buckets will not be filled\n");
3533 if (results == NULL) {
3579 if (rnd_pcg_range(&d->
rng,0,1)) {
3599 size_t* subgroupsfound,
3602 size_t randgroup = rnd_pcg_range(&d->
rng,0,n_outgroups-1);
3603 if (randgroup < *subgroupsfound) {
3604 return outgroups[randgroup];
3606 (*subgroupsfound)++;
3607 return outgroups[*subgroupsfound-1];
3640 fprintf(stderr,
"Invalid n value: number of fractions in which to split group must be at least 2\n");
3645 if (results == NULL) {
3664 size_t* subgroupsfound,
3666 double* cumulative_probs = (
double*) datastore;
3667 *subgroupsfound = n_outgroups;
3668 double randdraw = rnd_pcg_nextf(&d->
rng);
3669 for (
size_t j = 0; j < n_outgroups; ++j) {
3670 if (randdraw < cumulative_probs[j]) {
3671 return outgroups[j];
3713 const double* probs,
3716 fprintf(stderr,
"Invalid n value: number of fractions in which to split group must be at least 2\n");
3722 cumulative_probs[n-1] = 1.0;
3724 for (
size_t j = 0; j < n-1; ++j) {
3726 cumulative_probs[j] = sum;
3727 if (cumulative_probs[j] >= 1) {
3728 fprintf(stderr,
"Provided probabilities add up to 1 or more: some buckets will not be filled\n");
3729 for (; j < n-1; ++j) {
3730 cumulative_probs[j] = 1;
3738 if (results == NULL) {
3799 size_t filledbuckets = 0;
3806 if (g.
num >= bucketscap) {
3807 size_t oldcap = bucketscap;
3808 size_t newbucketcapacity = bucketscap;
3809 while (g.
num >= newbucketcapacity) {
3810 newbucketcapacity *= 2;
3813 if (g.
num >= bucketscap) {
3814 fprintf(stderr,
"Memory allocation failed. Not all groups found\n");
3817 memset(buckets+oldcap,0,
sizeof(
GSC_GLOBALX_T)*(bucketscap-oldcap));
3821 buckets[g.
num] += 1;
3822 if (buckets[g.
num] == 1) {
3830 size_t capacity = filledbuckets;
3832 fprintf(stderr,
"Found more groups than expected - gsc_SimData.n_groups is outdated somewhere."
3833 " Trimming output of get_existing_group_ to avoid a crash: not all groups may be shown\n");
3837 for (
size_t i = 1; i < bucketscap; ++i) {
3845 if (out_groups != NULL) {
3848 if (out_sizes != NULL) {
3849 out_sizes[g_index] = buckets[i];
3900 nextgroup.
num = existing_groups[(*cursor) - 1].
num + 1;
3903 while (*
cursor < n_existing_groups) {
3940 while (i < n_groups) {
3941 if (gn < existing_groups[i].num) {
3969 size_t existingi = 0;
3976 for (
size_t i = 0; i < n; ++i) {
3978 while (existingi < n_groups) {
3979 if (gn < existing_groups[existingi].num) {
4003 while (i < d->n_labels) {
4004 if (
new.id < d->label_ids[i].
id) {
4027 while (i < d->n_eff_sets) {
4028 if (
new.id < d->eff_set_ids[i].
id) {
4051 while (i < d->genome.n_maps) {
4052 if (
new.id < d->genome.map_ids[i].id) {
4082 while (first <= last) {
4083 mid = (first + last) / 2;
4115 while (first <= last) {
4116 mid = (first + last) / 2;
4139 if (d->genome.n_maps == 0) {
return GSC_NA_IDX; }
4140 if (d->genome.n_maps == 1) {
return (d->genome.map_ids[0].id == map.
id) ? 0 :
GSC_NA_IDX ; }
4148 while (first <= last) {
4149 mid = (first + last) / 2;
4151 if (d->genome.map_ids[mid].id == map.
id) {
4153 }
else if (d->genome.map_ids[mid].id < map.
id) {
4224 output[outix] = m->
alleles[i];
4226 if (outix == group_size) {
4262 output[outix] = m->
names[i];
4264 if (outix == group_size) {
4300 output[outix] = m->
ids[i];
4302 if (outix == group_size) {
4338 output[outix] = total_i;
4340 if (outix == group_size) {
4376 group_size = dm_bvs.
dim2;
4379 memcpy(output, dm_bvs.
matrix[0],
sizeof(*output)*group_size);
4407 const int whichParent,
4409 if (!(whichParent == 1 || whichParent == 2)) {
4410 fprintf(stderr,
"Value error: `parent` must be 1 or 2.");
4413 int parent = whichParent - 1;
4420 output[outix] = m->
pedigrees[parent][i];
4422 if (outix == group_size) {
4455 const int whichParent,
4457 if (!(whichParent == 1 || whichParent == 2)) {
4458 fprintf(stderr,
"Value error: `parent` must be 1 or 2.");
4461 int parent = whichParent - 1;
4471 output[outix] = NULL;
4474 if (outix == group_size) {
4518 char* fname =
"gS_gpptmp";
4522 if ((fp2 = fopen(fname,
"r")) == NULL) {
4523 fprintf(stderr,
"Failed to use temporary file\n");
4530 if (group_size == 0) {
return 0; }
4554 while ((nextc = fgetc(fp2)) !=
'\n' && nextc != EOF) {
4555 output[i][index] = nextc;
4558 if (index >= size) {
4560 char* temp = realloc(output[i],
sizeof(
char) * size);
4563 fprintf(stderr,
"Memory allocation of size %u failed.\n", size);
4570 output[i][index] =
'\0';
4596 for (
size_t i = 0; i < r; ++i) {
4598 for (
size_t j = 0; j < c; ++j) {
4599 zeros.
matrix[i][j] = 0.0;
4603 for (
size_t i = 0; i < r; ++i) {
4620 for (
size_t i = 0; i < m->
dim1; i++) {
4621 if (m->
matrix[i] != NULL) {
4656 if (m->
names[i] != NULL) {
4672 total_deleted += deleted;
4676 printf(
"%lu genotypes were deleted\n", (
long unsigned int) total_deleted);
4677 if (total_deleted > 0) {
4693 fprintf(stderr,
"Nonexistent effect set %lu\n", (
long unsigned int) effID.
id);
4712 d->
e[i] = d->
e[i+1];
4716 memcpy(newE, d->
e,
sizeof(*d->
e)*which_ix);
4717 memcpy(newE + which_ix, d->
e + which_ix + 1,
sizeof(*d->
e)*(d->
n_eff_sets - which_ix));
4723 if (newIDs == NULL) {
4748 fprintf(stderr,
"Nonexistent label %lu\n", (
long unsigned int)which_label.
id);
4773 if (new_label_ids == NULL) {
4786 if (new_label_defaults == NULL) {
4806 if (new_label_lookups == NULL) {
4812 memcpy(new_label_lookups, m->
labels,
sizeof(*m->
labels)*label_ix);
4813 memcpy(new_label_lookups + label_ix, m->
labels + label_ix + 1,
sizeof(*m->
labels)*(m->
n_labels - label_ix));
4815 m->
labels = new_label_lookups;
4850 if (g->
maps != NULL) {
4870 fprintf(stderr,
"Nonexistent recombination map %lu\n", (
long unsigned int) which_map.
id);
4885 if (tmplist == NULL) {
4899 if (tmpids == NULL) {
4923 if (m->
chrs != NULL) {
4926 case GSC_LINKAGEGROUP_SIMPLE:
4935 case GSC_LINKAGEGROUP_REORDER:
4986 if (m->
names[i] != NULL) {
4993 if (m->
labels[i] != NULL) {
5025 if (m->
eff != NULL) {
5123 if ((fp = fopen(filename,
"r")) == NULL) {
5124 fprintf(stderr,
"Failed to open file %s.\n", filename);
5142 if (tbl->
fp != NULL) { fclose(tbl->
fp); }
5155 if (tbl->
fp != NULL) {
5213 .predCol = 0, .predNewline = 0, .eof =
GSC_FALSE };
5217 size_t tblbuf_offset = 0;
5218 size_t tblbuf_len = 0;
5219 int predCarriageReturn = 0;
5224 if (0 < predCarriageReturn) { --predCarriageReturn; }
5230 predCarriageReturn = 2;
5232 if (!(predCarriageReturn && tbl->
buf[tbl->
cursor] ==
'\n')) {
5247 if (0 < predCarriageReturn) { ++predCarriageReturn; }
5251 tblbuf_offset = tbl->
cursor; tblbuf_len = 1;
5274 if (!warned && tblbuf_len > 8192) {
5276 fprintf(stderr,
"Warning: very long cell identified beginning %c%c%c%c%c%c. Column separators may have failed to be recognised\n",
5277 tmpcell[0],tmpcell[1],tmpcell[2],tmpcell[3],tmpcell[4],tmpcell[5]);
5281 memcpy(tmpcell+tmpix,tbl->
buf+tblbuf_offset,
sizeof(
char)*tblbuf_len);
5282 tmpix += tblbuf_len;
5283 tmpcell[tmpix] =
'\0';
5285 tblbuf_offset = 0; tblbuf_len = 0;
5296 memcpy(cur.
cell,tmpcell,
sizeof(
char)*tmpix);
5297 if (0 < tblbuf_len) {
5298 memcpy(cur.
cell+tmpix,tbl->
buf+tblbuf_offset,
sizeof(
char)*tblbuf_len);
5324 while (comparison != 0 && first <= last) {
5325 if (comparison == 0) {
5328 }
else if (comparison > 0) {
5332 if (index == 0) {
return 0; }
5337 index = (first + last) / 2;
5359 size_t* queuesize) {
5361 if (*queuesize > 0) {
5396 size_t* queuesize) {
5397 for (
int i = 0; i < max_headerlen + 1; ++i) {
5399 if (*queuesize <= i) {
5404 int headerlength = -1;
5405 if (outputq[i].predNewline) {
5407 }
else if (outputq[i].eof) {
5411 if (headerlength > 0) {
5412 if (headerlength >= min_headerlen && headerlength <= max_headerlen) {
5413 return headerlength;
5456 const char** titles_required,
5458 const char** titles_optional,
5462 int ncell_total = ncell_required + ncell_optional;
5463 for (
int i = 0; i < ncell_total; ++i) {
5469 for (; title_ix < ncell_required; ++title_ix) {
5474 _Bool found_match = 0;
5475 size_t title_len = strlen(titles_required[title_ix]);
5477 for (
int header_ix = title_ix; header_ix < ncellrow1; ++header_ix) {
5478 int header_queueix = col_order[header_ix];
5480 if (unprocessedqueue[header_queueix].cell_len == title_len &&
5481 strncmp(unprocessedqueue[header_queueix].cell,titles_required[title_ix],title_len) == 0) {
5482 if (header_ix != title_ix) {
5483 col_order[header_ix] = col_order[title_ix];
5484 col_order[title_ix] = header_queueix;
5492 int matches = ncell_required;
5493 for (; title_ix < ncell_total; ++title_ix) {
5498 _Bool found_match = 0;
5499 int title_ix_o = title_ix - ncell_required;
5500 size_t title_len = strlen(titles_optional[title_ix_o]);
5502 for (
int header_ix = matches; header_ix < ncellrow1; ++header_ix) {
5503 int header_queueix = col_order[header_ix];
5505 if (unprocessedqueue[header_queueix].cell_len == title_len &&
5506 strncmp(unprocessedqueue[header_queueix].cell,titles_optional[title_ix_o],title_len) == 0) {
5507 if (header_ix != title_ix) {
5508 col_order[header_ix] = col_order[title_ix];
5509 col_order[title_ix] = header_queueix;
5516 if (!found_match) { col_order[title_ix] = -1; }
5556 if (filename == NULL)
return 0;
5565 size_t queue_size = 0;
5568 const char* titles[3] = {
"marker",
"chr",
"pos"};
5570 int marker_colnum, chr_colnum, pos_colnum;
5573 printf(
"(Loading %s) Format: map file with header\n", filename);
5574 marker_colnum = colnums[0] + 1, chr_colnum = colnums[1] + 1, pos_colnum = colnums[2] + 1;
5576 printf(
"(Loading %s) Format: map file without header\n", filename);
5577 marker_colnum = 1, chr_colnum = 2, pos_colnum = 3;
5579 printf(
"(Loading %s) Failure: Cannot identify the expected 3 columns of the map file\n", filename);
5580 for (
int i = 0; i < queue_size; ++i) {
5581 if (!cellqueue[i].isCellShallow) {
GSC_FREE(cellqueue[i].cell); }
5590 if (!cellsread[0].isCellShallow) {
GSC_FREE(cellsread[0].cell); }
5591 if (!cellsread[1].isCellShallow) {
GSC_FREE(cellsread[1].cell); }
5592 if (!cellsread[2].isCellShallow) {
GSC_FREE(cellsread[2].cell); }
5594 _Bool goodrow = (header) ? 0 : 1;
5595 size_t goodrow_counter = 0;
5597 char* marker = NULL;
5600 char* conversionflag;
5609 if (ncell.
cell != NULL) {
5612 buffer[goodrow_counter].name = marker;
5613 buffer[goodrow_counter].chr = chr;
5614 buffer[goodrow_counter].pos = pos;
5617 if (goodrow_counter >= buffercap) {
5623 if (marker != NULL) {
5634 col += (ncell.
predCol > 0) ? 1 : 0;
5639 }
if (col == marker_colnum) {
5641 marker = ncell.
cell; ncell.
cell = NULL;
5644 }
else if (col == chr_colnum) {
5646 chr = ncell.
cell; ncell.
cell = NULL;
5656 }
else if (col == pos_colnum) {
5658 pos = strtod(ncell.
cell,&conversionflag);
5672 }
while (!ncell.
eof);
5676 buffer[goodrow_counter].name = marker;
5677 buffer[goodrow_counter].chr = chr;
5678 buffer[goodrow_counter].pos = pos;
5683 if (marker != NULL) {
5693 printf(
"(Loading %s) %u marker(s) with map positions were loaded. Failed to parse %u line(s).\n", filename, (
unsigned int) goodrow_counter, (
unsigned int) (row - header - goodrow_counter));
5698 return goodrow_counter;
5728 if (rlist[i].
name != NULL) {
5731 if (n_joined != i) {
5732 rlist[n_joined] = rlist[i];
5788 memcpy(tmpMats,d->
e,
sizeof(*tmpMats)*neweffsetindex);
5798 d->
e[neweffsetindex] = effset;
5810 if (n_markers < 2) {
return; }
5818 char* current_chr = markerlist[0].
chr;
5821 if (strcmp(markerlist[i].
chr, current_chr) != 0) {
5823 qsort(markerlist + chr_start, i - chr_start,
5827 current_chr = markerlist[i].
chr;
5831 qsort(markerlist + chr_start, n_markers - chr_start,
5852 if (n_markers == 0)
return NO_MAP;
5855 memset(chr_nmembers,0,
sizeof(*chr_nmembers)*40);
5857 chr_nmembers[0] = 1;
5859 chr_ids[n_chr-1] = markerlist[0].
chr;
5860 markerlist[0].
chr = NULL;
5862 while (i < n_markers && markerlist[i].
name == NULL) {
5865 if (strcmp(chr_ids[n_chr-1], markerlist[i].
chr) != 0) {
5867 if (n_chr >= chr_nmemberscap) {
5870 memset(chr_nmembers+n_chr,0,
sizeof(*chr_nmembers)*n_chr);
5873 chr_ids[n_chr-1] = markerlist[i].
chr;
5874 markerlist[i].
chr = NULL;
5875 chr_nmembers[n_chr-1] = 1;
5877 ++(chr_nmembers[n_chr-1]);
5893 first_marker = current_marker;
5894 double chrdist = markerlist[first_marker + chr_nmembers[chr_ix] - 1].
pos - markerlist[first_marker].
pos;
5906 for (; current_marker < endpt; ++current_marker) {
5907 if (markerlist[current_marker].
name == NULL) {
5917 first_marker = current_marker;
5918 firsts_coord_in_genome = coord;
5919 lgdists[n_goodmembers] = (markerlist[current_marker].
pos - markerlist[first_marker].
pos) / chrdist;
5922 }
else if (firsts_coord_in_genome + n_goodmembers < d->genome.n_markers &&
5923 strcmp(markerlist[current_marker].
name, d->
genome.
marker_names[firsts_coord_in_genome + n_goodmembers]) == 0) {
5925 lgdists[n_goodmembers] = (markerlist[current_marker].
pos - markerlist[first_marker].
pos) / chrdist;
5930 for (
GSC_GENOLEN_T backfill = 0; backfill < n_goodmembers; ++backfill) {
5931 marker_coords[backfill] = firsts_coord_in_genome + backfill;
5937 for (; current_marker < endpt; ++current_marker) {
5938 if (markerlist[current_marker].
name == NULL) {
5946 marker_coords[n_goodmembers] = coord;
5947 lgdists[n_goodmembers] = (markerlist[current_marker].
pos - markerlist[first_marker].
pos) / chrdist;
5952 if (n_goodmembers == 0) {
5954 }
else if (marker_coords == NULL) {
5956 map.
chrs[chr_ix_actual].
type = GSC_LINKAGEGROUP_SIMPLE;
5963 map.
chrs[chr_ix_actual].
type = GSC_LINKAGEGROUP_REORDER;
5969 if (chrdist >= 5000*n_goodmembers) { ++n_sparse_chr; }
5973 if (map.
n_chr == 0) {
5977 if (n_sparse_chr > 0) {
5978 fprintf(stderr,
"%d of this map's chromosomes are very sparse (averaging less than 1 marker "
5979 "per 5 Morgans of distance). If the map is not expected to be this sparse, check that "
5980 "positions in the map file are in centimorgans, not base pairs.\n", n_sparse_chr);
6004 double expected_n_recombinations) {
6006 fprintf(stderr,
"Cannot create a recombination map if there is no genome\n");
6012 if (markernames == NULL) {
6018 map.
chrs[0].
type = GSC_LINKAGEGROUP_SIMPLE;
6024 if (n_markers == 0)
return NO_MAP;
6027 _Bool found_first = 0;
6034 if (!found_first || marker_coords != NULL) {
6038 if (markernames[i] == NULL) {
6042 }
else if (!found_first) {
6044 firsts_coord_in_genome = coord;
6047 marker_coords[chrmarker_ix] = coord;
6051 }
else if (firsts_coord_in_genome < d->genome.n_markers &&
6059 for (
GSC_GENOLEN_T backfill = 0; backfill < chrmarker_ix; ++backfill) {
6060 marker_coords[backfill] = firsts_coord_in_genome + backfill;
6063 if (markernames[i] == NULL) {
6074 double lgdist = 1./(chrmarker_ix-1);
6076 for (
GSC_GENOLEN_T i = 1; i < chrmarker_ix; ++i) { lgdists[i] = lgdists[i-1] + lgdist; }
6078 if (marker_coords == NULL) {
6079 map.
chrs[0].
type = GSC_LINKAGEGROUP_SIMPLE;
6085 map.
chrs[0].
type = GSC_LINKAGEGROUP_REORDER;
6092 if (could_not_match > 0) {
6093 fprintf(stderr,
"%d of the marker names do not appear in the genome\n", could_not_match);
6120 fprintf(stderr,
"Cannot create a recombination map if there is no genome\n");
6124 if (markernames == NULL) {
6132 map.
chrs[i].
type = GSC_LINKAGEGROUP_SIMPLE;
6150 if (markernames[m] == NULL) {
6158 if (could_not_match > 0) {
6159 fprintf(stderr,
"%d of the marker names do not appear in the genome\n", could_not_match);
6166 map.
chrs[i].
type = GSC_LINKAGEGROUP_SIMPLE;
6220 if (filename == NULL)
return NO_MAP;
6224 if (nrows == 0 || mapcontents == NULL) {
6225 if (mapcontents != NULL) {
6231 _Bool freeMapNames = 1;
6232 if (d->genome.n_markers > 0) {
6235 if (new_nrows < nrows) {
6236 printf(
"Discarded %lu markers when loading map %s because they do not appear in the primary map.\n", (
long unsigned int) (nrows - new_nrows), filename);
6252 d->genome.marker_names[i] = mapcontents[i].
name;
6253 d->genome.names_alphabetical[i] = &(d->genome.marker_names[i]);
6260 if (strcmp(*d->genome.names_alphabetical[i-1],*d->genome.names_alphabetical[i]) == 0) { ++n_dups; }
6263 fprintf(stderr,
"%d marker names were duplicates. It is recommended to remove duplicate names from the map file "
6264 "because data will only be loaded into one of the duplicates.\n", n_dups);
6273 for (
size_t i = 0; i < nrows; ++i) {
6277 for (
size_t i = 0; i < nrows; ++i) {
6278 if (mapcontents[i].
chr != NULL) {
GSC_FREE(mapcontents[i].
chr); }
6334 size_t queuesize = 0;
6337 const char* titles[4] = {
"marker",
"allele",
"eff",
"centre"};
6339 int marker_colnum, allele_colnum, eff_colnum, centre_colnum;
6342 printf(
"(Loading %s) Format: effect file with header\n", filename);
6343 marker_colnum = colnums[0] + 1, allele_colnum = colnums[1] + 1, eff_colnum = colnums[2] + 1;
6344 centre_colnum = colnums[3] + 1;
6346 printf(
"(Loading %s) Format: effect file without header\n", filename);
6347 marker_colnum = 1, allele_colnum = 2, eff_colnum = 3;
6349 centre_colnum = (row1len > 3) ? 4 : 0;
6351 printf(
"(Loading %s) Failure: Cannot identify the 3 required columns of the effect file\n", filename);
6356 if (centre_colnum > 0) {
6357 printf(
"(Loading %s) The file has %d columns. Identified optional column \"centre\"\n", filename, row1len);
6361 cellqueue += row1len;
6362 queuesize -= row1len;
6363 for (
int i = 0; i < row1len; ++i) {
6364 if (!cellsread[i].isCellShallow) {
GSC_FREE(cellsread[i].cell); }
6367 _Bool goodrow = (header) ? 0 : 1;
6372 char* conversionflag;
6378 if (ncell.
cell != NULL) {
6381 if (goodrow && col >= row1len) {
6383 if (n_effects >= raweffectscap) {
6392 col += (ncell.
predCol > 0) ? 1 : 0;
6396 }
else if (col == marker_colnum) {
6399 &(raweffects[n_effects].markerix));
6406 }
else if (col == allele_colnum) {
6411 raweffects[n_effects].allele = ncell.
cell[0];
6414 }
else if (col == eff_colnum) {
6416 raweffects[n_effects].eff = strtod(ncell.
cell,&conversionflag);
6423 }
else if (col == centre_colnum) {
6425 raweffects[n_effects].centre = strtod(ncell.
cell,&conversionflag);
6439 }
while (!ncell.
eof);
6441 if (goodrow && col >= row1len) {
6445 printf(
"(Loading %s) %lu effect value(s) were loaded. Failed to parse %lu line(s).\n",
6446 filename, (
long unsigned int) n_effects, (
long unsigned int) (row - header - n_effects));
6449 if (n_effects == 0) {
6463 if (centre_colnum) {
6473 if (centre_colnum) {
6476 if (raweffects[i].markerix != markerix_current) {
6477 for (
GSC_GENOLEN_T j = markerix_current; j < raweffects[i].markerix; ++j) {
6480 markerix_current = raweffects[i].markerix;
6483 e.
allele[i] = raweffects[i].allele;
6484 e.
eff[i] = raweffects[i].eff;
6485 e.
centre[markerix_current] += raweffects[i].centre * raweffects[i].eff;
6493 if (raweffects[i].markerix != markerix_current) {
6494 for (
GSC_GENOLEN_T j = markerix_current; j < raweffects[i].markerix; ++j) {
6497 markerix_current = raweffects[i].markerix;
6500 e.
allele[i] = raweffects[i].allele;
6501 e.
eff[i] = raweffects[i].eff;
6521 switch (c.
cell[0]) {
6543 if (c.
cell[0] ==
'm') {
6544 switch (c.
cell[1]) {
6562 if (c.
cell[1] ==
'/') {
6582 char*
pos = loc.localAM->alleles[loc.localPos] + 2*markerix;
6600 phase = rnd_pcg_range(&forrng->
rng,0,1);
6629 phase = rnd_pcg_range(&forrng->
rng,0,1);
6634 phase = rnd_pcg_range(&forrng->
rng,0,1);
6639 phase = rnd_pcg_range(&forrng->
rng,0,1);
6644 phase = rnd_pcg_range(&forrng->
rng,0,1);
6649 phase = rnd_pcg_range(&forrng->
rng,0,1);
6654 phase = rnd_pcg_range(&forrng->
rng,0,1);
6673 .alloctogroup = allocation_group,
6724 fprintf(stderr,
"EmptyListNavigator invalid\n");
6745 if (NULL == it->
d->
m) {
6749 while (NULL != listend->
next) {
6750 listend = listend->
next;
6787 const size_t firstrowlen,
6788 const size_t queuelen,
6790 const char* filenameforlog) {
6792 if (format.markers_as_rows ==
GSC_TRUE || format.markers_as_rows ==
GSC_FALSE) {
6794 }
else if (d->genome.n_maps == 0) {
6798 printf(
"(Loading %s) Format axis: genetic markers are -rows-, founder lines are |columns| (by assumption when no genetic map is loaded)\n", filenameforlog);
6799 printf(
"(Loading %s) No genetic map is loaded, will invent a map where all markers are unlinked/show independent assortment\n", filenameforlog);
6802 }
else if (format.has_header ==
GSC_FALSE) {
6803 printf(
"(Loading %s) Format axis: genetic markers are -rows-, founder lines are |columns| "
6804 "(by assumption when matrix has no header row)\n", filenameforlog);
6810 int firstsafeheaderindex = -1;
6811 if (firstrowlen > 1) {
6812 firstsafeheaderindex = 1;
6813 }
else if (firstrowlen == 1 && queuelen > firstrowlen + 1) {
6814 firstsafeheaderindex = 0;
6818 if (firstsafeheaderindex >= 0) {
6822 printf(
"(Loading %s) Format axis: genetic markers are |columns|, founder lines are -rows-\n", filenameforlog);
6829 if (queuelen > firstrowlen && !cellqueue[firstrowlen].eof &&
6831 printf(
"(Loading %s) Format axis: genetic markers are -rows-, founder lines are |columns|\n", filenameforlog);
6837 for (
size_t i = firstsafeheaderindex + 1; i < firstrowlen; ++i) {
6839 printf(
"(Loading %s) Format axis: genetic markers are |columns|, founder lines are -rows-\n", filenameforlog);
6847 printf(
"(Loading %s) Format axis: genetic markers are -rows-, founder lines are |columns| (by default file format)\n", filenameforlog);
6867 const size_t firstrowlen,
6868 const size_t queuelen,
6870 const char* filenameforlog) {
6872 _Bool style_detected = 0;
6873 _Bool single_col_file = 0;
6878 if (firstrowlen == queuelen || cellqueue[firstrowlen].eof) {
6880 if (firstrowlen > 1) {
6884 single_col_file = 1;
6888 if (firstrowlen + 1 < queuelen && cellqueue[firstrowlen+1].predNewline < 1) {
6892 single_col_file = 1;
6898 if (style_detected) {
6899 switch(format.cell_style) {
6902 case GSC_GENOTYPECELLSTYLE_COUNT: printf(
"(Loading %s) Allele format: reference allele counts (phase will be randomised)\n", filenameforlog);
break;
6903 case GSC_GENOTYPECELLSTYLE_ENCODED: printf(
"(Loading %s) Allele format: IUPAC encoded pair (phase will be randomised)\n", filenameforlog);
break;
6905 if (single_col_file || firstrowlen == queuelen ||
6906 (firstrowlen + 1 == queuelen && cellqueue[firstrowlen].eof && cellqueue[firstrowlen].cell_len == 0)) {
6907 printf(
"(Loading %s) Warning: empty genotype matrix. No genotypes will be loaded.\n", filenameforlog);
6909 fprintf(stderr,
"(Loading %s) Failure: Unable to determine the formatting of pairs of alleles."
6910 " Check genomicSimulation manual for accepted allele pair encodings\n", filenameforlog);
6947 const size_t firstrowlen,
6948 const size_t queuelen,
6950 const char* filenameforlog) {
6953 printf(
"(Loading %s) Failure: genetic markers cannot be represented by columns when matrix has no header row\n", filenameforlog);
6954 format.has_header =
GSC_NA;
6960 if (firstrowlen == 1) {
6964 if (cellqueue[2].eof || cellqueue[2].predNewline) {
6974 for (
size_t i = 1; i < firstrowlen; ++i) {
6982 switch (format.has_header) {
6983 case GSC_FALSE: printf(
"(Loading %s) Format: genotype matrix without header row\n", filenameforlog);
break;
6984 case GSC_TRUE: printf(
"(Loading %s) Format: genotype matrix with header row\n", filenameforlog);
break;
6985 default: fprintf(stderr,
"(Loading %s) Failure: Unable to determine whether file has header row\n", filenameforlog);
break;
7008 const size_t ncellsfirstrow,
7009 const size_t ncellssecondrow,
7010 const _Bool secondrowheaderisempty) {
7011 if (ncellssecondrow == ncellsfirstrow + 1) {
7013 }
else if (ncellssecondrow == ncellsfirstrow) {
7014 if (secondrowheaderisempty) {
7019 }
else if (ncellssecondrow == ncellsfirstrow - 1 && secondrowheaderisempty) {
7076 const char* filename,
7078 if (filename == NULL)
return NO_GROUP;
7080 fprintf(stderr,
"Non-genotype-matrix format specification provided to genotype matrix file loader function\n");
7090 size_t queuesize = 0;
7096 size_t ncellsread = 0;
7101 if (ncellsread >= cellsreadcap) {
7104 }
while (!cellsread[ncellsread-1].eof && (ncellsread <= 1 || !cellsread[ncellsread-1].predNewline));
7105 size_t ncellsfirstrow = (cellsread[ncellsread-1].eof && cellsread[ncellsread-1].cell_len > 0) ? ncellsread : ncellsread - 1;
7106 if (!cellsread[ncellsread-1].eof) {
7110 if (ncellsread >= cellsreadcap) {
7114 queuesize = ncellsread;
7115 if (ncellsread <= 1) {
7119 int is_onerow_file = ncellsread == ncellsfirstrow || cellsread[ncellsfirstrow].eof;
7134 while (!cellsread[ncellsread-1].eof && !cellsread[ncellsread-1].predNewline) {
7138 if (ncellsread >= cellsreadcap) {
7143 queuesize = ncellsread;
7144 size_t ncellssecondrow = ncellsread - ncellsfirstrow - 1;
7146 if (format_has_corner_cell ==
GSC_NA) {
7147 fprintf(stderr,
"(Loading %s) Failure: Header row length and second row length do not align\n", filename);
7156 cellqueue = cellsread + ncellsfirstrow;
7157 queuesize = ncellsread - ncellsfirstrow;
7161 _Bool build_map_from_rows = 0;
7164 build_map_from_rows = 1;
7173 }
while (!cell.
eof);
7186 fprintf(stderr,
"(Loading %s) Failure: Genotype matrix with markers as columns but no header row is an unsupported file type (there is no way to tell which column is which marker)\n", filename);
7190 size_t i = format_has_corner_cell ? 1 : 0;
7197 cellsread[i].isCellShallow =
GSC_TRUE;
7215 n_cols = (format_detected.
has_header) ? ncellsfirstrow + 1 : ncellsfirstrow;
7223 if (ncell.
cell != NULL) {
7227 if (build_map_from_rows) {
7228 ++nvalidmarker; have_valid_marker = 1;
7242 nvalidmarker += have_valid_marker;
7248 if (row == 1 && format_detected.
has_header) {
7249 if (column + 1 != ncellsfirstrow && column + 1 != ncellsfirstrow + 1) {
7250 fprintf(stderr,
"(Loading %s) Failure: Header row length and second row length do not align\n", filename);
7253 n_cols = column + 1;
7262 if (have_valid_marker && column < n_cols) {
7270 }
while (!ncell.
eof);
7271 if (row == 1 && format_detected.
has_header) {
7272 if (column + 1 != ncellsfirstrow && column + 1 != ncellsfirstrow + 1) {
7273 fprintf(stderr,
"(Loading %s) Failure: Header row length and second row length do not align\n", filename);
7276 n_cols = column + 1;
7283 size_t i = format_has_corner_cell ? 1 : 0;
7285 for (
size_t j = 0; i < ncellsfirstrow; ++i, ++j) {
7289 cellsread[i].isCellShallow =
GSC_TRUE;
7294 if (build_map_from_rows) {
7303 fprintf(stderr,
"(Loading %s) Failure: Genotype matrix with markers as columns but no header row is an unsupported file type (there is no way to tell which column is which marker)\n", filename);
7308 size_t i = format_has_corner_cell ? 1 : 0;
7309 size_t n_col = ncellsfirstrow + (1-i);
7325 if (ncell.
cell != NULL) {
7350 }
while (!ncell.
eof);
7359 while (tmpam != NULL) {
7361 tmpam = tmpam->next;
7363 printf(
"(Loading %s) %lu genotype(s) of %lu marker(s) were loaded.\n", filename,
7364 (
long unsigned int) ngenos, (
long unsigned int) nvalidmarker);
7374 for (
size_t j = 0; j < ncellsfirstrow; ++j) {
7375 if (!cellsread[j].isCellShallow) {
GSC_FREE(cellsread[j].cell); }
7385 for (
size_t i = 1; i <= queuesize; ++i) {
7386 if (!cellsread[ncellsread-i].isCellShallow) {
7387 GSC_FREE(cellsread[ncellsread-i].cell);
7388 cellsread[ncellsread-i].isCellShallow =
GSC_TRUE;
7393 for (
size_t j = 0; j < ncellsfirstrow; ++j) {
7394 if (!cellsread[j].isCellShallow) {
GSC_FREE(cellsread[j].cell); }
7420 const char* filename,
7444 const char* genotype_file,
7445 const char* map_file,
7446 const char* effect_file,
7453 char* suffix = strrchr(genotype_file,
'.');
7454 if (suffix != NULL) {
7455 if (strcmp(suffix,
".bed") == 0) {
7457 }
else if (strcmp(suffix,
".ped") == 0) {
7459 }
else if (strcmp(suffix,
".vcf") == 0) {
7470 fprintf(stderr,
"plink .bed file parsing not yet implemented\n");
7473 fprintf(stderr,
"plink .ped file parsing not yet implemented\n");
7476 fprintf(stderr,
"vcf file parsing not yet implemented\n");
7525 unsigned int p2num,
char* offspring,
int certain) {
7527 fprintf(stderr,
"Need at least one recombination map loaded to estimate recombinations\n");
7533 fprintf(stderr,
"We don't have that recombination maps loaded\n");
7540 int p1match, p2match;
7544 for (
int chr = 0; chr <
map.n_chr; ++chr) {
7547 switch (
map.chrs[chr].type) {
7548 case GSC_LINKAGEGROUP_SIMPLE:
7549 for (
int i = 0; i <
map.chrs[chr].map.simple.n_markers; ++i) {
7552 if (p1match && !p2match) {
7553 origins[
map.chrs[chr].map.simple.first_marker_index + i] = p1num;
7555 }
else if (p2match && !p1match) {
7556 origins[
map.chrs[chr].map.simple.first_marker_index + i] = p2num;
7560 origins[
map.chrs[chr].map.simple.first_marker_index + i] = 0;
7562 origins[
map.chrs[chr].map.simple.first_marker_index + i] = previous;
7568 case GSC_LINKAGEGROUP_REORDER:
7569 for (
int i = 0; i <
map.chrs[chr].map.reorder.n_markers; ++i) {
7572 if (p1match && !p2match) {
7573 origins[
map.chrs[chr].map.reorder.marker_indexes[i]] = p1num;
7575 }
else if (p2match && !p1match) {
7576 origins[
map.chrs[chr].map.reorder.marker_indexes[i]] = p2num;
7580 origins[
map.chrs[chr].map.reorder.marker_indexes[i]] = 0;
7582 origins[
map.chrs[chr].map.reorder.marker_indexes[i]] = previous;
7634 unsigned int p2num,
char* offspring,
int window_size,
int certain) {
7636 fprintf(stderr,
"Need at least one recombination map loaded to estimate recombinations\n");
7642 fprintf(stderr,
"We don't have that recombination maps loaded\n");
7650 int p1match, p2match;
7651 int previous = 0, window_range = (window_size - 1)/2, i;
7653 for (
int chr = 0; chr <
map.n_chr; ++chr) {
7656 switch (
map.chrs[chr].type) {
7657 case GSC_LINKAGEGROUP_SIMPLE:
7658 for (i = 0; i < window_range; ++i) {
7659 origins[
map.chrs[chr].map.simple.first_marker_index + i] = 0;
7661 for (; i <
map.chrs[chr].map.simple.n_markers - window_range; ++i) {
7664 if (p1match && !p2match) {
7665 origins[
map.chrs[chr].map.simple.first_marker_index + i] = p1num;
7667 }
else if (p2match && !p1match) {
7668 origins[
map.chrs[chr].map.simple.first_marker_index + i] = p2num;
7672 origins[
map.chrs[chr].map.simple.first_marker_index + i] = 0;
7674 origins[
map.chrs[chr].map.simple.first_marker_index + i] = previous;
7678 for (; i <
map.chrs[chr].map.simple.n_markers; ++i) {
7679 origins[
map.chrs[chr].map.simple.first_marker_index + i] = 0;
7683 case GSC_LINKAGEGROUP_REORDER:
7684 for (i = 0; i < window_range; ++i) {
7685 origins[
map.chrs[chr].map.reorder.marker_indexes[i]] = 0;
7687 for (; i <
map.chrs[chr].map.reorder.n_markers - window_range; ++i) {
7690 if (p1match && !p2match) {
7691 origins[
map.chrs[chr].map.reorder.marker_indexes[i]] = p1num;
7693 }
else if (p2match && !p1match) {
7694 origins[
map.chrs[chr].map.reorder.marker_indexes[i]] = p2num;
7698 origins[
map.chrs[chr].map.reorder.marker_indexes[i]] = 0;
7700 origins[
map.chrs[chr].map.reorder.marker_indexes[i]] = previous;
7704 for (; i <
map.chrs[chr].map.reorder.n_markers; ++i) {
7705 origins[
map.chrs[chr].map.reorder.marker_indexes[i]] = 0;
7751 int window_len,
int certain) {
7755 if ((fp = fopen(input_file,
"r")) == NULL) {
7756 fprintf(stderr,
"Failed to open file %s.\n", input_file); exit(1);
7759 if ((fpo = fopen(output_file,
"w")) == NULL) {
7760 fprintf(stderr,
"Failed to open file %s.\n", output_file); exit(1);
7769 char* combin_genes[3];
7773 for (
int i = 0; i < t.
num_rows; ++i) {
7775 fscanf(fp,
"%s %s %s \n", buffer[0], buffer[1], buffer[2]);
7779 if (combin_i[0] < 0 || combin_i[1] < 0 || combin_i[2] < 0) {
7780 fprintf(stderr,
"Genotypes at file %s line %lu could not be found\n", input_file, (
long unsigned int) i);
7787 if (window_len == 1) {
7796 fprintf(fpo,
"\n%s", buffer[0]);
7798 fprintf(fpo,
"\t%d", r[j]);
7804 fwrite(
"\n",
sizeof(
char), 1, fpo);
7841 const char* parent_genome,
7845 if (parent_genome == NULL) {
7846 fprintf(stderr,
"Could not generate this gamete: no parent provided\n");
7850 fprintf(stderr,
"Could not generate this gamete: invalid map provided\n");
7862 case GSC_LINKAGEGROUP_SIMPLE:
7865 case GSC_LINKAGEGROUP_REORDER:
7869 fprintf(stderr,
"Linkage group type of linkage group with index %lu of map with index %lu is corrupted\n",
7870 (
long unsigned int) chr, (
long unsigned int) map_index);
7875 if (num_crossovers > crossover_wherecap) {
7878 for (
int i = 0; i < num_crossovers; ++i) {
7879 crossover_where[i] = ((double)rand() / (double)RAND_MAX);
7881 if (num_crossovers > 1) {
7886 int which = rnd_pcg_range(&d->
rng,0,1);
7887 int up_to_crossover = 0;
7889 case GSC_LINKAGEGROUP_SIMPLE:
7892 while (up_to_crossover < num_crossovers &&
7901 case GSC_LINKAGEGROUP_REORDER:
7904 while (up_to_crossover < num_crossovers &&
7940 const char* parent_genome,
7948 if (parent_genome == NULL) {
7949 fprintf(stderr,
"Could not make this doubled haploid\n");
7953 fprintf(stderr,
"Could not generate this gamete: invalid map provided\n");
7965 case GSC_LINKAGEGROUP_SIMPLE:
7968 case GSC_LINKAGEGROUP_REORDER:
7972 fprintf(stderr,
"Linkage group type of group with index %lu of map with index %lu is corrupted\n",
7973 (
long unsigned int) chr, (
long unsigned int) map_index);
7978 if (num_crossovers > crossover_wherecap) {
7981 for (
int i = 0; i < num_crossovers; ++i) {
7982 crossover_where[i] = ((double)rand() / (double)RAND_MAX);
7984 if (num_crossovers > 1) {
7989 int which = rnd_pcg_range(&d->
rng,0,1);
7990 int up_to_crossover = 0;
7992 case GSC_LINKAGEGROUP_SIMPLE:
7995 while (up_to_crossover < num_crossovers &&
8001 output[2*pos] = parent_genome[2*pos + which];
8002 output[2*pos + 1] = output[2*pos];
8005 case GSC_LINKAGEGROUP_REORDER:
8008 while (up_to_crossover < num_crossovers &&
8014 output[2*pos] = parent_genome[2*pos + which];
8015 output[2*pos + 1] = output[2*pos];
8038 const char* parent_genome,
8041 output[2*j] = parent_genome[2*j];
8042 output[2*j + 1] = parent_genome[2*j + 1];
8056 strcpy(tmpname_p,
"out");
8058 strcat(tmpname_p,
"-pedigree.txt");
8059 fp = fopen(tmpname_p,
"w");
8080 strcpy(tmpname_b,
"out");
8082 strcat(tmpname_b,
"-bv.txt");
8083 fe = fopen(tmpname_b,
"w");
8098 strcpy(tmpname_g,
"out");
8100 strcat(tmpname_g,
"-genotype.txt");
8101 fg = fopen(tmpname_g,
"w");
8246 void* parentIterator,
8248 int (*parentChooser)(
void*,
8258 parentChooser == NULL || offspringGenerator == NULL) {
8273 while (last->
next != NULL) {
8287 while (parentChooser(parentIterator, datastore, &counter, parents)) {
8301 last->
next = offspring;
8310 offspringGenerator(d, datastore, parents, offspringPos);
8311 offspring->
groups[fullness] = output_group;
8331 last->
next = offspring;
8334 return output_group;
8358 if (*counter < datastore->rand.n_crosses &&
8372 if (datastore->
rand.
cap > 0) {
8373 datastore->
rand.
uses[parentixs[0]] += 1;
8374 datastore->
rand.
uses[parentixs[1]] += 1;
8422 fprintf(stderr,
"Group %lu does not exist\n", (
long unsigned int) from_group.
num);
8426 if (n_crosses < 1) {
8427 fprintf(stderr,
"Invalid n_crosses value provided: n_crosses must be greater than 0\n");
8432 fprintf(stderr,
"Invalid cap value provided: cap can't be negative\n");
8435 if (cap > 0 && cap*g_size < n_crosses) {
8436 fprintf(stderr,
"Invalid cap value provided: cap of %lu uses on %lu parents too small to make %lu crosses\n",
8437 (
long unsigned int) cap, (
long unsigned int) g_size, (
long unsigned int) n_crosses);
8478 }
else if (g_size == 1) {
8479 fprintf(stderr,
"Group %lu must contain multiple individuals to be able to perform random crossing\n",
8480 (
long unsigned int) from_group.
num);
8484 fprintf(stderr,
"Crossing requires at least one recombination map loaded\n");
8504 fprintf(stderr,
"Could not find recombination map with identifier %lu\n", (
long unsigned int) which_map.
id);
8547 if (max < 1 || (max == 1 && noCollision == 0)) {
8550 if (max > INT_MAX) {
8551 fprintf(stderr,
"Drawing a random number with a max of %lu is not supported on the C version"
8552 "with the rnd library. If the max is greater than %d, probabilistic uniformity may be lost"
8553 "or an infinite loop may occur.", (
long unsigned int) max, INT_MAX);
8559 parentix = rnd_pcg_range(&d->
rng,0,max - 1);
8560 }
while (parentix == noCollision || member_uses[parentix] >=
cap);
8563 parentix = rnd_pcg_range(&d->
rng,0,max - 1);
8564 }
while (parentix == noCollision);
8586 size_t parentixs[2] = { 0 };
8588 if (*counter < datastore->
rand_btwn.n_crosses &&
8670 fprintf(stderr,
"Crossing requires at least one recombination map loaded\n");
8701 fprintf(stderr,
"Could not find recombination map with identifier %lu\n", (
long unsigned int) map1.
id);
8708 fprintf(stderr,
"Could not find recombination map with identifier %lu\n", (
long unsigned int) map2.
id);
8743 while (*counter < datastore->
targeted.n_crosses) {
8797 const size_t n_combinations,
8803 if (n_combinations < 1) {
8804 fprintf(stderr,
"Invalid n_combinations value provided: n_combinations must be greater than 0\n");
8808 fprintf(stderr,
"Crossing requires at least one recombination map loaded\n");
8825 fprintf(stderr,
"Could not find recombination map with identifier %lu\n", (
long unsigned int) map1.
id);
8832 fprintf(stderr,
"Could not find recombination map with identifier %lu\n", (
long unsigned int) map2.
id);
8845 fprintf(stderr,
"Targeted crossing failed for %lu out of the %lu requested pairings due to one or both genotype indexes being invalid\n", (
long unsigned int) paramstore.
targeted.
bad_pairings, (
long unsigned int) n_combinations);
8870 parents[1] = parents[0];
8899 int n_oddness = n % 2;
8900 for (
unsigned int i = 0; i < n; ++i) {
8901 if (i % 2 == n_oddness) {
8904 tmpparent = tmpchild;
8938 const unsigned int n,
8948 fprintf(stderr,
"Invalid n value provided: Number of generations must be greater than 0\n");
8952 fprintf(stderr,
"Selfing requires at least one recombination map loaded\n");
8964 fprintf(stderr,
"Could not find recombination map with identifier %lu\n", (
long unsigned int) which_map.
id);
8991 parents[0].mapindex);
9023 fprintf(stderr,
"Crossing requires at least one recombination map loaded\n");
9033 fprintf(stderr,
"Could not find recombination map with identifier %lu\n", (
long unsigned int) which_map.
id);
9063 parents[1] = parents[0];
9166 fprintf(stderr,
"Group %lu does not have enough members to perform crosses\n", (
long unsigned int) from_group.
num);
9168 fprintf(stderr,
"Group %lu does not exist\n", (
long unsigned int) from_group.
num);
9186 combinations[0][cross_index] = group_indexes[i];
9187 combinations[1][cross_index] = group_indexes[j];
9202 fprintf(stderr,
"Function gsc_make_n_crosses_from_top_m_percent is deprecated."
9203 "It behaved unintuitively and goes against genomicSimulation principles on division of functionality\n");
9235 const char* input_file,
9241 fprintf(stderr,
"No crosses exist in that file\n");
9247 if ((fp = fopen(input_file,
"r")) == NULL) {
9248 fprintf(stderr,
"Failed to open file %s.\n", input_file); exit(1);
9257 for (
int filei = 0; filei < t.
num_rows; ++filei) {
9259 fscanf(fp,
"%s %s \n", buffer[0], buffer[1]);
9262 if (combinations[0][bufferi] < 0 || combinations[1][bufferi] < 0) {
9263 fprintf(stderr,
"Parents on file %s line %lu could not be found\n", input_file, (
long unsigned int) filei);
9309 const char* input_file,
9315 fprintf(stderr,
"No crosses exist in that file\n");
9321 if ((fp = fopen(input_file,
"r")) == NULL) {
9322 fprintf(stderr,
"Failed to open file %s.\n", input_file); exit(1);
9329 const char* to_buffer[] = {buffer[0], buffer[1], buffer[2], buffer[3]};
9335 fscanf(fp,
"%s %s %s %s \n", buffer[0], buffer[1], buffer[2], buffer[3]);
9338 fprintf(stderr,
"Could not go ahead with the line %lu cross - g0 names not in records\n",
9339 (
long unsigned int) i);
9348 if (f1_i[0] < 0 || f1_i[1] < 0) {
9352 if (f1_i[0] < 0 || f1_i[1] < 0) {
9355 if (f1_i[0] < 0 || f1_i[1] < 0) {
9356 fprintf(stderr,
"Could not go ahead with the line %lu cross - f1 children do not exist for this quartet\n",
9357 (
long unsigned int) i);
9366 combinations[0][i] = f1_i[0];
9367 combinations[1][i] = f1_i[1];
9400 const _Bool lowIsBest) {
9403 fprintf(stderr,
"Nonexistent effect set with id %lu\n", (
long unsigned int) effID.
id);
9408 if (group_size == 0) {
9409 fprintf(stderr,
"Group %lu does not exist\n", (
long unsigned int) group.
num);
9415 if (group_size <= top_n) {
9426 for (
size_t i = 0; i < fits.
dim2; i++) {
9427 p_fits[i] = &(fits.
matrix[0][i]);
9440 top_individuals[i] = group_indexes[p_fits[i] - fits.
matrix[0]];
9474 fprintf(stderr,
"Nonexistent effect set with id %lu\n", (
long unsigned int) effID.
id);
9501 if (targets == NULL || effset == NULL) {
9502 fprintf(stderr,
"Either targets or marker effects were not provided\n");
9512 if (n_genotypes >= sumcap) {
9516 sum[n_genotypes] = 0;
9524 double asum = ( (e.
allele[eix] == genotype[2*m]) +
9525 (e.
allele[eix] == genotype[2*m+1]) ) * e.
eff[eix];
9529 sum[n_genotypes] += msum;
9537 double summedcentres = 0.;
9539 summedcentres += e.
centre[m];
9543 sum[i] -= summedcentres;
9549 out.
dim2 = n_genotypes;
9571 const char allele) {
9579 if (n_genotypes >= countscap) {
9587 counts[n_genotypes][m] = (genotype[2*m] == allele) + (genotype[2*m+1] == allele);
9597 out.
dim1 = n_genotypes;
9637 fprintf(stderr,
"Creating blocks by chromosome length requires at least one recombination map loaded\n");
9643 fprintf(stderr,
"We don't have that recombination maps loaded. Using default map\n");
9649 fprintf(stderr,
"Invalid n value: number of blocks must be positive\n");
9652 if (map.
n_chr < 1) {
9653 fprintf(stderr,
"Map has no chromosomes, so it cannot be divided into blocks\n");
9671 case GSC_LINKAGEGROUP_SIMPLE:
9684 while (blockix - firstblockix < n-1 &&
9697 if (currentn >= temp_markers_in_blockcap) {
9713 case GSC_LINKAGEGROUP_REORDER:
9726 while (blockix - firstblockix < n-1 &&
9739 if (currentn >= temp_markers_in_blockcap) {
9792 if ((infile = fopen(block_file,
"r")) == NULL) {
9793 fprintf(stderr,
"Failed to open file %s.\n", block_file); exit(1);
9803 fscanf(infile,
"%*[^\n]\n");
9806 while (fscanf(infile,
"%*d %*f %*s %*s ") != EOF) {
9818 memset(markerbuffer, 0,
sizeof(*markerbuffer) * bufferlen);
9819 while ((c = fgetc(infile)) != EOF && c !=
'\n') {
9821 markername[ni] =
'\0';
9827 markerbuffer[mi] = markerindex;
9882 fprintf(stderr,
"Nonexistent effect set with id %lu\n", (
long unsigned int) effID.
id);
9934 if (hap1 >= bvscap) {
9952 _Bool gotallele1 = 0;
9953 _Bool gotallele2 = 0;
9957 if (!gotallele1 && e.
allele[eix] == genotype[2*markerix]) {
9958 bvs[hap1][j] += e.
eff[eix];
9961 if (!gotallele2 && e.
allele[eix] == genotype[2*markerix + 1]) {
9962 bvs[hap2][j] += e.
eff[eix];
9977 if (hap1 >= bvscap) {
9994 _Bool gotallele1 = 0;
9995 _Bool gotallele2 = 0;
9999 if (!gotallele1 && e.
allele[eix] == genotype[2*markerix]) {
10000 bvs[hap1][j] += e.
eff[eix];
10003 if (!gotallele2 && e.
allele[eix] == genotype[2*markerix + 1]) {
10004 bvs[hap2][j] += e.
eff[eix];
10008 bvs[hap1][j] -= e.
centre[markerix];
10009 bvs[hap2][j] -= e.
centre[markerix];
10020 out.
dim1 = 2*n_genotypes;
10047 const char symbol_na,
10048 char* opt_haplotype) {
10051 fprintf(stderr,
"Nonexistent effect set with id %lu\n", (
long unsigned int) effID.
id);
10063 best_allele = symbol_na;
10065 best_allele = e.
allele[e_ix];
10066 best_score = e.
eff[e_ix];
10070 if (e.
eff[e_ix] > best_score) {
10071 best_score = e.
eff[e_ix];
10072 best_allele = e.
allele[e_ix];
10076 opt_haplotype[m_ix] = best_allele;
10107 const char symbol_na,
10108 char* opt_haplotype) {
10111 fprintf(stderr,
"Nonexistent effect set with id %lu\n", (
long unsigned int) effID.
id);
10122 best_score[m] = NAN;
10123 opt_haplotype[m] = symbol_na;
10135 if (!checked[e_ix] && (genotype[2*m] == e.
allele[e_ix] ||
10136 genotype[2*m+1] == e.
allele[e_ix])) {
10138 double score = 2 * e.
eff[e_ix];
10139 if (isnan(best_score[m]) || score > best_score[m]) {
10140 best_score[m] = score;
10141 opt_haplotype[m] = e.
allele[e_ix];
10175 fprintf(stderr,
"Nonexistent effect set with id %lu\n", (
long unsigned int) effID.
id);
10180 double best_gebv = 0.;
10182 double best_score = 0;
10185 best_score = e.
eff[e_ix];
10189 if (e.
eff[e_ix] > best_score) {
10190 best_score = e.
eff[e_ix];
10194 best_gebv += (2*best_score);
10198 double summedcentres = 0.;
10200 summedcentres += e.
centre[m];
10202 best_gebv -= summedcentres;
10223 fprintf(stderr,
"Nonexistent effect set with id %lu\n", (
long unsigned int) effID.
id);
10233 best_score[m] = NAN;
10245 if (!checked[e_ix] && (genotype[2*m] == e.
allele[e_ix] ||
10246 genotype[2*m+1] == e.
allele[e_ix])) {
10248 double score = 2 * e.
eff[e_ix];
10249 if (isnan(best_score[m]) || score > best_score[m]) {
10250 best_score[m] = score;
10260 double optimal_bv = 0;
10262 if (!isnan(best_score[m])) {
10263 optimal_bv += best_score[m];
10268 double summedcentres = 0.;
10270 summedcentres += e.
centre[m];
10272 optimal_bv -= summedcentres;
10294 fprintf(stderr,
"Nonexistent effect set with id %lu\n", (
long unsigned int) effID.
id);
10299 double worst_gebv = 0.;
10301 double worst_score = 0;
10304 worst_score = e.
eff[e_ix];
10308 if (e.
eff[e_ix] < worst_score) {
10309 worst_score = e.
eff[e_ix];
10313 worst_gebv += (2*worst_score);
10317 double summedcentres = 0.;
10319 summedcentres += e.
centre[m];
10321 worst_gebv -= summedcentres;
10351 if ((f = fopen(fname,
"w")) == NULL) {
10352 fprintf(stderr,
"Failed to open file %s for writing output\n", fname);
return;
10385 const _Bool markers_as_rows) {
10387 if ((f = fopen(fname,
"w")) == NULL) {
10388 fprintf(stderr,
"Failed to open file %s for writing output\n", fname);
return;
10424 const _Bool markers_as_rows) {
10426 if ((f = fopen(fname,
"w")) == NULL) {
10427 fprintf(stderr,
"Failed to open file %s for writing output\n", fname);
return;
10434 markers_as_rows, allele);
10464 const _Bool full_pedigree) {
10466 if ((f = fopen(fname,
"w")) == NULL) {
10467 fprintf(stderr,
"Failed to open file %s for writing output\n", fname);
return;
10499 if ((f = fopen(fname,
"w")) == NULL) {
10500 fprintf(stderr,
"Failed to open file %s for writing output\n", fname);
return;
10505 fprintf(stderr,
"Marker effect set %lu does not exist: cannot calculate breeding values\n", (
long unsigned int) effID.
id);
return;
10545 const _Bool headers) {
10547 if ((f = fopen(fname,
"w")) == NULL) {
10548 fprintf(stderr,
"Failed to open file %s for writing output\n", fname);
return;
10564 int len = (name == NULL) ? 0 : strlen(name);
10568 strncpy(ghapnames[i], name,
sizeof(
char)*len);
10569 ghapnames[i][len] =
'_'; ghapnames[i][len+1] =
'1'; ghapnames[i][len+2] =
'\0';
10570 strncpy(ghapnames[i+1], name,
sizeof(
char)*len);
10571 ghapnames[i+1][len] =
'_'; ghapnames[i+1][len+1] =
'2'; ghapnames[i+1][len+2] =
'\0';
10574 if (i >= dec.
dim1) {
10580 for (; i < dec.
dim1; ++i) {
10581 ghapnames[i] = NULL;
10584 for (
size_t i = 0; i < dec.
dim1; ++i) {
10585 if (ghapnames[i] != NULL) {
10604 switch (chr.
type) {
10605 case GSC_LINKAGEGROUP_SIMPLE:
10617 case GSC_LINKAGEGROUP_REORDER:
10691 char**
const marker_names,
10697 const char header[] =
"Chrom\tLen\tMarkers\n";
10698 fwrite(header,
sizeof(
char)*strlen(header), 1, f);
10709 for (
GSC_GENOLEN_T chrix = 0; chrix < map->n_chr; ++chrix) {
10711 map->chrs[chrix],&minpos)) {
10717 map->chrs[chrix],&pos)) {
10718 maxpos = (pos > maxpos) ? pos : maxpos;
10719 minpos = (pos < minpos) ? pos : minpos;
10725 len = maxpos - minpos;
10731 if (isonchr >= 0) {
10732 fprintf(f,
"%lu\t%lf\t",(
long unsigned int)isonchr,len*100);
10734 const char colns[] =
"-\t-\t";
10735 fwrite(colns,
sizeof(
char)*strlen(colns), 1, f);
10742 if (k <= n_markers) {
10743 fwrite(marker_names[k],
sizeof(
char)*strlen(marker_names[k]), 1, f);
10745 fprintf(f,
"%lu",(
long unsigned int)k);
10750 fwrite(
"\n",
sizeof(
char), 1, f);
10766 char**
const marker_names,
10767 const _Bool markers_as_rows,
10768 void (*bodycell_printer)(FILE*,
10772 void* bodycell_printer_data) {
10777 fprintf(f,
"%lu",(
long unsigned int) targets->
group.
num);
10781 if (markers_as_rows) {
10784 if (targets != NULL) {
10787 fwrite(
"\t",
sizeof(
char), 1, f);
10791 fwrite(n,
sizeof(
char)*strlen(n), 1, f);
10793 fprintf(f,
"%lu", (
long unsigned int)
gsc_get_id(loc).
id);
10798 fwrite(
"\n",
sizeof(
char), 1, f);
10806 if (ntargets > 0 && ((row < n_markers || (ntargets > 0 && row < targets->cachedAM->n_markers)))) {
10808 if (genos != NULL) {
10815 while (row < n_markers || (ntargets > 0 && row < targets->cachedAM->n_markers)) {
10817 if (row < n_markers) {
10818 if (marker_names[row] != NULL) {
10819 fwrite(marker_names[row],
sizeof(
char)*strlen(marker_names[row]), 1, f);
10826 if (genos != NULL) {
10833 fwrite(
"\t",
sizeof(
char), 1, f);
10834 bodycell_printer(f,loc,row,bodycell_printer_data);
10837 fwrite(
"\n",
sizeof(
char), 1, f);
10840 if (genos != NULL) {
GSC_FREE(genos); }
10844 if (marker_names != NULL) {
10846 fwrite(
"\t",
sizeof(
char), 1, f);
10847 if (marker_names[i] != NULL) {
10848 fwrite(marker_names[i],
sizeof(
char)*strlen(marker_names[i]), 1, f);
10851 fwrite(
"\n",
sizeof(
char), 1, f);
10855 if (targets != NULL) {
10861 fwrite(n,
sizeof(
char)*strlen(n), 1, f);
10863 fprintf(f,
"%lu", (
long unsigned int)
gsc_get_id(loc).
id);
10868 fwrite(
"\t",
sizeof(
char), 1, f);
10869 bodycell_printer(f,loc,i,bodycell_printer_data);
10871 fwrite(
"\n",
sizeof(
char), 1, f);
10900 char allele = *(
char*) data;
10902 if (
get_alleles(loc)[2*markerix] == allele) { ++count; }
10903 if (
get_alleles(loc)[2*markerix + 1] == allele) { ++count; }
10904 char out =
'0' + count;
10905 fwrite(&out,
sizeof(
char), 1, f);
10959 char**
const marker_names,
10960 const _Bool markers_as_rows) {
11022 char**
const marker_names,
11023 const _Bool markers_as_rows,
11024 const char allele) {
11038 void (*strprinter)(
char*,
size_t,
void*),
11039 void (*intprinter)(
long unsigned int,
void*),
11040 void* printer_data) {
11044 strprinter(
"=(",
sizeof(
char)*2,printer_data);
11053 if (p1.
id == p2.
id) {
11057 if (name != NULL) {
11058 strprinter(name,
sizeof(
char)*strlen(name), printer_data);
11060 intprinter((
long unsigned int) p1.
id,printer_data);
11070 if (name != NULL) {
11071 strprinter(name,
sizeof(
char)*strlen(name),printer_data);
11073 intprinter((
long unsigned int) p1.
id,printer_data);
11080 strprinter(
",",
sizeof(
char),printer_data);
11084 if (name != NULL) {
11085 strprinter(name,
sizeof(
char)*strlen(name),printer_data);
11087 intprinter((
long unsigned int) p2.
id,printer_data);
11097 strprinter(
")",
sizeof(
char),printer_data);
11103 FILE* f = (FILE*) data;
11104 fwrite(str, strlen, 1, f);
11110 FILE* f = (FILE*) data;
11111 fprintf(f,
"%lu", i);
11175 const _Bool full_pedigree,
11178 if (targets == NULL) {
return; }
11181 switch (full_pedigree) {
11188 fwrite(n,
sizeof(
char)*strlen(n), 1, f);
11190 fprintf(f,
"%lu", (
long unsigned int)
gsc_get_id(loc).
id);
11194 for (
int parent = 0; parent < 2; ++parent) {
11195 fwrite(
"\t",
sizeof(
char), 1, f);
11202 fwrite(n,
sizeof(
char)*strlen(n), 1, f);
11204 fprintf(f,
"%lu", (
long unsigned int) p.
id);
11208 fwrite(
"\n",
sizeof(
char), 1, f);
11217 fprintf(f,
"%lu\t", (
long unsigned int)
gsc_get_id(loc).
id);
11220 fwrite(n,
sizeof(
char)*strlen(n), 1, f);
11226 && parent_pedigree_store != NULL) {
11232 fwrite(
"\n",
sizeof(
char), 1, f);
11266 if (targets == NULL || eff == NULL) {
return; }
11271 for (
size_t i = 0; i < bvs.
dim2; ++i) {
11273 fprintf(f,
"%lu", (
long unsigned int)
gsc_get_id(loc).
id);
11274 fwrite(
"\t",
sizeof(
char), 1, f);
11277 fwrite(n,
sizeof(
char), strlen(n), f);
11279 fwrite(
"\t",
sizeof(
char), 1, f);
11281 fwrite(
"\t\t",
sizeof(
char)*2, 1, f);
11284 fprintf(f,
"%lf", bvs.
matrix[0][i]);
11285 fwrite(
"\n",
sizeof(
char), 1, f);
11313 char** row_headers,
11314 char** col_headers,
11315 _Bool dim1_is_columns) {
11316 if (dec == NULL || dec->dim1 == 0 || dec->dim2 == 0) {
return; }
11319 size_t ncols = (dim1_is_columns) ? dec->dim1 : dec->dim2;
11320 fwrite(col_headers[0],
sizeof(
char), strlen(col_headers[0]), f);
11321 for (
size_t col = 1; col < ncols; ++col) {
11322 fwrite(
"\t",
sizeof(
char), 1, f);
11323 fwrite(col_headers[col],
sizeof(
char), strlen(col_headers[col]), f);
11325 fwrite(
"\n",
sizeof(
char), 1, f);
11328 if (dim1_is_columns) {
11329 for (
size_t row = 0; row < dec->dim2; ++row) {
11331 fwrite(row_headers[row],
sizeof(
char), strlen(row_headers[row]), f);
11332 fwrite(
"\t",
sizeof(
char), 1, f);
11335 fprintf(f,
"%lf",dec->matrix[0][row]);
11336 for (
size_t col = 1; col < dec->dim1; ++col) {
11337 fwrite(
"\t",
sizeof(
char), 1, f);
11338 fprintf(f,
"%lf",dec->matrix[col][row]);
11340 fwrite(
"\n",
sizeof(
char), 1, f);
11343 for (
size_t row = 0; row < dec->dim1; ++row) {
11345 fwrite(row_headers[row],
sizeof(
char), strlen(row_headers[row]), f);
11346 fwrite(
"\t",
sizeof(
char), 1, f);
11349 fprintf(f,
"%lf",dec->matrix[row][0]);
11350 for (
size_t col = 1; col < dec->dim2; ++col) {
11351 fwrite(
"\t",
sizeof(
char), 1, f);
11352 fprintf(f,
"%lf",dec->matrix[row][col]);
11354 fwrite(
"\n",
sizeof(
char), 1, f);
double gsc_calculate_optimal_possible_bv(const gsc_SimData *d, const gsc_GroupNum group, const gsc_EffectID effID)
Calculates the breeding value of the highest breeding-value genotype that can be created from the all...
gsc_DecimalMatrix gsc_calculate_utility_local_bvs(gsc_BidirectionalIterator *targets, gsc_MarkerBlocks b, gsc_MarkerEffects e)
Calculate local haplotype block breeding values for a set of genotypes.
gsc_DecimalMatrix gsc_calculate_allele_counts(const gsc_SimData *d, const gsc_GroupNum group, const char allele)
Calculates the number of times at each marker that a particular allele appears.
gsc_DecimalMatrix gsc_calculate_bvs(const gsc_SimData *d, const gsc_GroupNum group, const gsc_EffectID effID)
Calculate the fitness metric/breeding value for every genotype in the simulation or every genotype in...
gsc_MarkerBlocks gsc_create_evenlength_blocks_each_chr(const gsc_SimData *d, const gsc_MapID mapid, const unsigned int n)
Divide the genotype into blocks where each block contains all markers within a 1/n length section of ...
gsc_GroupNum gsc_split_by_bv(gsc_SimData *d, const gsc_GroupNum group, const gsc_EffectID effID, const unsigned int top_n, const _Bool lowIsBest)
Takes the top_n individuals in the group with the best breeding values/fitnesses and puts them in a n...
void gsc_calculate_optimal_haplotype(const gsc_SimData *d, const gsc_EffectID effID, const char symbol_na, char *opt_haplotype)
Create a string containing the allele at each marker with the highest contributions to the additive b...
gsc_DecimalMatrix gsc_calculate_utility_bvs(gsc_BidirectionalIterator *targets, const gsc_MarkerEffects *effset)
Calculate the fitness metric/breeding value for a set of genotypes.
void gsc_calculate_optimal_possible_haplotype(const gsc_SimData *d, const gsc_GroupNum group, const gsc_EffectID effID, const char symbol_na, char *opt_haplotype)
Calculates the highest-breeding-value haplotype that can be created from the alleles present in a giv...
gsc_DecimalMatrix gsc_calculate_local_bvs(const gsc_SimData *d, const gsc_GroupNum group, const gsc_MarkerBlocks b, const gsc_EffectID effID)
Calculate local breeding values for every genotype in the simulation or every genotype in a certain g...
double gsc_calculate_minimal_bv(const gsc_SimData *d, const gsc_EffectID effID)
Calculate the lowest possible breeding value any (diploid) genotype could score using this set of mar...
gsc_MarkerBlocks gsc_load_blocks(const gsc_SimData *d, const char *block_file)
Given a file containing definitions of blocks of markers, process that file and return a struct conta...
double gsc_calculate_optimal_bv(const gsc_SimData *d, const gsc_EffectID effID)
Calculate the highest possible breeding value any (diploid) genotype could score using this set of ma...
unsigned int gsc_get_group_genes(const gsc_SimData *d, const gsc_GroupNum group_id, unsigned int group_size, char **output)
Gets a shallow copy of the genes/alleles of each member of the group.
unsigned int gsc_get_group_parent_ids(const gsc_SimData *d, const gsc_GroupNum group_id, unsigned int group_size, const int whichParent, gsc_PedigreeID *output)
Gets the ids of either the first or second parent of each member of the group.
unsigned int gsc_get_group_parent_names(const gsc_SimData *d, const gsc_GroupNum group_id, unsigned int group_size, const int whichParent, char **output)
Gets the names of either the first or second parent of each member of the group.
unsigned int gsc_get_group_pedigrees(const gsc_SimData *d, const gsc_GroupNum group_id, unsigned int group_size, char **output)
Gets the full pedigree string (as per gsc_save_group_full_pedigree() ) of each member of the group.
unsigned int gsc_get_group_names(const gsc_SimData *d, const gsc_GroupNum group_id, unsigned int group_size, char **output)
Gets a shallow copy of the names of each member of the group.
unsigned int gsc_get_group_indexes(const gsc_SimData *d, const gsc_GroupNum group_id, unsigned int group_size, unsigned int *output)
Gets the 0-based global indexes of each member of the group.
unsigned int gsc_get_group_ids(const gsc_SimData *d, const gsc_GroupNum group_id, unsigned int group_size, gsc_PedigreeID *output)
Gets the ids of each member of the group.
unsigned int gsc_get_group_bvs(const gsc_SimData *d, const gsc_GroupNum group_id, const gsc_EffectID effID, unsigned int group_size, double *output)
Gets the breeding values/breeding values/fitnesses of each member of the group.
size_t gsc_get_existing_group_counts(gsc_SimData *d, gsc_GroupNum *out_groups, unsigned int *out_sizes)
Identify group numbers that currently have members, and how many members they have.
size_t gsc_get_existing_groups(gsc_SimData *d, gsc_GroupNum *output)
Identify group numbers that currently have members.
unsigned int gsc_get_group_size(const gsc_SimData *d, const gsc_GroupNum group_id)
Function to count the number of genotypes that currently belong to the specified group.
gsc_GroupNum gsc_make_double_crosses_from_file(gsc_SimData *d, const char *input_file, const gsc_MapID map1, const gsc_MapID map2, const gsc_GenOptions g)
Perform crosses between previously-generated offspring of pairs of parents identified by name in a fi...
gsc_GroupNum gsc_make_random_crosses(gsc_SimData *d, const gsc_GroupNum from_group, const unsigned int n_crosses, const unsigned int cap, const gsc_MapID which_map, const gsc_GenOptions g)
Performs random crosses among members of a group.
gsc_GroupNum gsc_make_crosses_from_file(gsc_SimData *d, const char *input_file, const gsc_MapID map1, const gsc_MapID map2, const gsc_GenOptions g)
Perform crosses between pairs of parents identified by name in a file and allocate the resulting offs...
gsc_GroupNum gsc_make_doubled_haploids(gsc_SimData *d, const gsc_GroupNum group, const gsc_MapID which_map, const gsc_GenOptions g)
Creates a doubled haploid from each member of a group.
gsc_GroupNum gsc_make_clones(gsc_SimData *d, const gsc_GroupNum group, const _Bool inherit_names, gsc_GenOptions g)
Creates an identical copy of each member of a group.
gsc_GroupNum gsc_make_random_crosses_between(gsc_SimData *d, const gsc_GroupNum group1, const gsc_GroupNum group2, const unsigned int n_crosses, const unsigned int cap1, const unsigned int cap2, const gsc_MapID map1, const gsc_MapID map2, const gsc_GenOptions g)
Performs random crosses where the first parent comes from one group and the second from another.
gsc_GroupNum gsc_scaffold_make_new_genotypes(gsc_SimData *d, const gsc_GenOptions g, void *parentIterator, union gsc_datastore_make_genotypes *datastore, int(*parentChooser)(void *, union gsc_datastore_make_genotypes *, unsigned int *, gsc_ParentChoice[static 2]), void(*offspringGenerator)(gsc_SimData *, union gsc_datastore_make_genotypes *, gsc_ParentChoice[static 2], gsc_GenoLocation))
Make new genotypes (generic function)
gsc_GroupNum gsc_make_all_unidirectional_crosses(gsc_SimData *d, const gsc_GroupNum from_group, const gsc_MapID mapID, const gsc_GenOptions g)
Perform crosses between all pairs of parents in the group from_group and allocates the resulting offs...
gsc_GroupNum gsc_self_n_times(gsc_SimData *d, const unsigned int n, const gsc_GroupNum group, const gsc_MapID which_map, const gsc_GenOptions g)
Selfs each member of a group for a certain number of generations.
gsc_GroupNum gsc_make_targeted_crosses(gsc_SimData *d, const size_t n_combinations, const unsigned int *firstParents, const unsigned int *secondParents, const gsc_MapID map1, const gsc_MapID map2, const gsc_GenOptions g)
Performs the crosses of pairs of parents whose indexes are provided in an array.
void gsc_delete_label(gsc_SimData *d, const gsc_LabelID which_label)
Clears memory of this label from the simulation and all its genotypes.
void gsc_delete_simdata(gsc_SimData *m)
Deletes a gsc_SimData object and frees its memory.
void gsc_delete_eff_set(gsc_SimData *d, gsc_EffectID effID)
Deletes a particular set of marker effects from memory.
void gsc_delete_effects_table(gsc_MarkerEffects *m)
Deletes an gsc_MarkerEffects object and frees its memory.
void gsc_delete_randomaccess_iter(gsc_RandomAccessIterator *it)
Deletes a gsc_RandomAccessIterator object and frees its memory.
void gsc_delete_recombination_map(gsc_SimData *d, const gsc_MapID which_map)
Deletes a particular recombination map from memory.
void gsc_delete_bidirectional_iter(gsc_BidirectionalIterator *it)
Deletes a gsc_BidirectionalIterator object.
void gsc_delete_markerblocks(gsc_MarkerBlocks *b)
Delete a gsc_MarkerBlocks struct.
void gsc_delete_dmatrix(gsc_DecimalMatrix *m)
Deletes a gsc_DecimalMatrix and frees its memory.
void gsc_delete_allele_matrix(gsc_AlleleMatrix *m)
Delete the gsc_AlleleMatrix linked list from m onwards and frees its memory.
void gsc_delete_recombination_map_nointegrity(gsc_RecombinationMap *m)
Deletes and clears the memory of a gsc_RecombinationMap struct.
void gsc_delete_group(gsc_SimData *d, const gsc_GroupNum group_id)
Deletes all genotypes belonging to a particular group.
void gsc_move_genotype(gsc_GenoLocation from, gsc_GenoLocation to, int *label_defaults)
Move all details of the genotype at one gsc_GenoLocation to another gsc_GenoLocation.
void gsc_delete_genome(gsc_KnownGenome *g)
Deletes and clears the memory of a gsc_KnownGenome object and its children.
size_t gsc_split_into_buckets(gsc_SimData *d, const gsc_GroupNum group_id, const size_t n, const unsigned int *counts, gsc_GroupNum *results)
Split a group into n groups of equal size (or size differing only by one, if n does not perfectly div...
size_t gsc_split_by_probabilities(gsc_SimData *d, const gsc_GroupNum group_id, const size_t n, const double *probs, gsc_GroupNum *results)
Allocate each member of the group to one of n groups with custom probabilities for each group.
size_t gsc_scaffold_split_by_somequality(gsc_SimData *d, const gsc_GroupNum group_id, void *somequality_data, gsc_GroupNum(*somequality_tester)(gsc_GenoLocation, void *, size_t, size_t, gsc_GroupNum *), size_t maxentries_results, gsc_GroupNum *results)
Split by some quality (generic function)
gsc_GroupNum gsc_split_evenly_into_two(gsc_SimData *d, const gsc_GroupNum group_id)
Split a group into two groups of equal size (or size differing only by one, if the original group had...
gsc_GroupNum gsc_split_by_label_range(gsc_SimData *d, const gsc_GroupNum group, const gsc_LabelID whichLabel, const int valueLowBound, const int valueHighBound)
Allocates the genotypes with values of a label in a particular range to a new group.
size_t gsc_split_into_individuals(gsc_SimData *d, const gsc_GroupNum group_id, size_t maxentries_results, gsc_GroupNum *results)
Split a group into n one-member groups.
size_t gsc_split_into_halfsib_families(gsc_SimData *d, const gsc_GroupNum group_id, const int parent, size_t maxentries_results, gsc_GroupNum *results)
Split a group into families of half-siblings by shared first or second parent.
size_t gsc_split_evenly_into_n(gsc_SimData *d, const gsc_GroupNum group_id, const size_t n, gsc_GroupNum *results)
Split a group into n groups of equal size (or size differing only by one, if n does not perfectly div...
size_t gsc_scaffold_split_by_someallocation(gsc_SimData *d, const gsc_GroupNum group_id, void *someallocator_data, gsc_GroupNum(*someallocator)(gsc_GenoLocation, gsc_SimData *, void *, size_t, size_t *, gsc_GroupNum *), size_t n_outgroups, gsc_GroupNum *outgroups)
Split by some allocator (generic function)
gsc_GroupNum gsc_combine_groups(gsc_SimData *d, const size_t list_len, const gsc_GroupNum *grouplist)
Combine a set of groups into one group.
gsc_GroupNum gsc_split_by_label_value(gsc_SimData *d, const gsc_GroupNum group, const gsc_LabelID whichLabel, const int valueToSplit)
Allocates the genotypes with a particular value of a label to a new group.
gsc_GroupNum gsc_split_randomly_into_two(gsc_SimData *d, const gsc_GroupNum group_id)
Flip a coin for each member of the group to decide if it should be moved to the new group.
gsc_GroupNum gsc_make_group_from(gsc_SimData *d, const size_t index_list_len, const unsigned int *genotype_indexes)
Take a list of indexes and allocate the genotypes at those indexes to a new group.
size_t gsc_split_randomly_into_n(gsc_SimData *d, const gsc_GroupNum group_id, const size_t n, gsc_GroupNum *results)
Allocate each member of the group to one of n groups with equal probability.
size_t gsc_split_into_families(gsc_SimData *d, const gsc_GroupNum group_id, size_t maxentries_results, gsc_GroupNum *results)
Split a group into families by their pedigrees.
gsc_BidirectionalIterator gsc_create_bidirectional_iter(gsc_SimData *d, const gsc_GroupNum group)
Create a bidirectional iterator.
gsc_AlleleMatrix * gsc_get_nth_AlleleMatrix(gsc_AlleleMatrix *listStart, const unsigned int n)
Get an gsc_AlleleMatrix by index in the linked list.
#define GSC_INVALID_GENO_LOCATION
Constant representing a nonexistent location in the simulation.
gsc_BidirectionalIterator gsc_create_bidirectional_iter_fromAM(gsc_AlleleMatrix *am, const gsc_GroupNum group)
gsc_GenoLocation gsc_next_forwards(gsc_BidirectionalIterator *it)
Get the next location from a bidirectional iterator.
gsc_GenoLocation gsc_set_bidirectional_iter_to_end(gsc_BidirectionalIterator *it)
Initialise a Bidirectional iterator to the end of its sequence.
gsc_RandomAccessIterator gsc_create_randomaccess_iter(gsc_SimData *d, const gsc_GroupNum group)
Create a Random Access Iterator.
gsc_GenoLocation gsc_set_bidirectional_iter_to_start(gsc_BidirectionalIterator *it)
Initialise a Bidirectional iterator to the start of its sequence.
gsc_GenoLocation gsc_next_backwards(gsc_BidirectionalIterator *it)
Get the previous location from a bidirectional iterator.
gsc_GenoLocation gsc_next_get_nth(gsc_RandomAccessIterator *it, const unsigned int n)
Get a location by index using a gsc_RandomAccessIterator.
#define GSC_IS_VALID_LOCATION(g)
Check if a GenoLocation is INVALID_GENO_LOCATION.
static gsc_PedigreeID gsc_get_id(const gsc_GenoLocation loc)
Get the persistent id of a genotype.
static char * gsc_get_name(const gsc_GenoLocation loc)
Get the name of a genotype.
static int gsc_get_label_value(const gsc_GenoLocation loc, const int labelIndex)
Get the value of a specific label of a genotype.
static char * gsc_get_alleles(const gsc_GenoLocation loc)
Get the alleles of a genotype.
static void gsc_set_group(const gsc_GenoLocation loc, const gsc_GroupNum group)
Set the current group membership of a genotype.
static gsc_PedigreeID gsc_get_first_parent(const gsc_GenoLocation loc)
Get the first/left parent of a genotype.
static gsc_PedigreeID gsc_get_second_parent(const gsc_GenoLocation loc)
Get the second/right parent of a genotype.
static void gsc_set_name(const gsc_GenoLocation loc, char *name)
Set the name of a genotype.
static gsc_GroupNum gsc_get_group(const gsc_GenoLocation loc)
Get the current group membership of a genotype.
gsc_AlleleMatrix * gsc_create_empty_allelematrix(const unsigned int n_markers, const unsigned int n_labels, const int *labelDefaults, const unsigned int n_genotypes)
Creator for an empty gsc_AlleleMatrix object of a given size.
gsc_EffectID gsc_load_effectfile(gsc_SimData *d, const char *filename)
Populates a gsc_SimData combination with effect values.
struct gsc_MultiIDSet gsc_load_data_files(gsc_SimData *d, const char *genotype_file, const char *map_file, const char *effect_file, const gsc_FileFormatSpec format)
Populates a gsc_SimData object with marker allele data, a genetic map, and (optionally) marker effect...
gsc_GroupNum gsc_load_genotypefile(gsc_SimData *d, const char *filename, const gsc_FileFormatSpec format)
Load a set of genotypes to a gsc_SimData object.
gsc_MapID gsc_create_uniformspaced_recombmap(gsc_SimData *d, unsigned int n_markers, char **markernames, double expected_n_recombinations)
Create a uniformly-spaced gsc_RecombinationMap from a list of marker names and save to SimData.
void gsc_clear_simdata(gsc_SimData *d)
Clear a gsc_SimData object on the heap.
gsc_MapID gsc_create_unlinked_recombmap(gsc_SimData *d, unsigned int n_markers, char **markernames)
Create a gsc_RecombinationMap with independent assortment of alleles across a list of marker names,...
gsc_SimData * gsc_create_empty_simdata(unsigned int RNGseed)
Creator for an empty gsc_SimData object on the heap.
gsc_MapID gsc_load_mapfile(gsc_SimData *d, const char *filename)
Load a genetic map to a gsc_SimData object.
gsc_MapID gsc_create_recombmap_from_markerlist(gsc_SimData *d, unsigned int n_markers, struct gsc_MapfileUnit *markerlist)
Parse a list of markers/chrs/positions into a gsc_RecombinationMap and save to SimData.
int gsc_randpoi(rnd_pcg_t *rng, double lambda)
Generates randomly a number from the Poisson distribution with parameter lambda, using the Knuth appr...
gsc_DecimalMatrix gsc_generate_zero_dmatrix(const size_t r, const size_t c)
Generates a matrix of c columns, r rows with all 0.
void gsc_generate_clone(gsc_SimData *d, const char *parent_genome, char *output)
Get an identical copy of a given genotype.
void gsc_generate_doubled_haploid(gsc_SimData *d, const char *parent_genome, char *output, const unsigned int map_index)
Get the alleles of the outcome of producing a doubled haploid from a gamete from a given parent.
void gsc_generate_gamete(gsc_SimData *d, const char *parent_genome, char *output, const unsigned int map_index)
Fills a char* with the simulated result of meiosis (reduction and recombination) from the marker alle...
static int gsc_has_same_alleles_window(const char *g1, const char *g2, const size_t start, const size_t w)
Simple operator to determine if at markers with indexes i to i+w inclusive, two genotypes share at le...
int gsc_calculate_recombinations_from_file(gsc_SimData *d, const char *input_file, const char *output_file, int window_len, int certain)
Provides guesses as to the location of recombination events that led to the creation of certain genot...
int * gsc_calculate_min_recombinations_fw1(gsc_SimData *d, gsc_MapID mapid, char *parent1, unsigned int p1num, char *parent2, unsigned int p2num, char *offspring, int certain)
Identify markers in the genotype of offspring where recombination from its parents occured.
static int gsc_has_same_alleles(const char *p1, const char *p2, const size_t i)
Simple operator to determine if at marker i, two genotypes share at least one allele.
int * gsc_calculate_min_recombinations_fwn(gsc_SimData *d, gsc_MapID mapid, char *parent1, unsigned int p1num, char *parent2, unsigned int p2num, char *offspring, int window_size, int certain)
Identify markers in the genotype of offspring where recombination from its parents occured,...
void gsc_save_utility_markerblocks(FILE *f, const gsc_MarkerBlocks b, const unsigned int n_markers, char **const marker_names, const gsc_RecombinationMap *map)
Prints the markers contained in a set of blocks to a file.
void gsc_save_utility_dmatrix(FILE *f, gsc_DecimalMatrix *dec, char **row_headers, char **col_headers, _Bool dim1_is_columns)
Output the contents of a matrix to a file.
void gsc_save_utility_allele_counts(FILE *f, gsc_BidirectionalIterator *targets, unsigned int n_markers, char **const marker_names, const _Bool markers_as_rows, const char allele)
Prints allele counts of simulated genotypes to a file.
void gsc_save_utility_pedigrees(FILE *f, gsc_BidirectionalIterator *targets, const _Bool full_pedigree, const gsc_AlleleMatrix *parent_pedigree_store)
Prints pedigrees to a file.
void gsc_save_markerblocks(const char *fname, const gsc_SimData *d, const gsc_MarkerBlocks b, const gsc_MapID labelMapID)
Prints the markers contained in a set of blocks to a file.
void gsc_save_bvs(const char *fname, const gsc_SimData *d, const gsc_GroupNum groupID, const gsc_EffectID effID)
Prints breeding values of genotypes in the simulation to a file.
void gsc_save_local_bvs(const char *fname, const gsc_SimData *d, const gsc_GroupNum groupID, const gsc_MarkerBlocks b, const gsc_EffectID effID, const _Bool headers)
Prints local breeding values of candidates to a file.
void gsc_save_allele_counts(const char *fname, const gsc_SimData *d, const gsc_GroupNum groupID, const char allele, const _Bool markers_as_rows)
Prints allele counts of genotypes from the simulation to a file.
void gsc_save_genotypes(const char *fname, const gsc_SimData *d, const gsc_GroupNum groupID, const _Bool markers_as_rows)
Prints genotypes from the simulation to a file.
void gsc_save_utility_bvs(FILE *f, gsc_BidirectionalIterator *targets, const gsc_MarkerEffects *eff)
Calculate and print breeding values to a file.
void gsc_save_utility_genotypes(FILE *f, gsc_BidirectionalIterator *targets, unsigned int n_markers, char **const marker_names, const _Bool markers_as_rows)
Prints simulated genotypes to a file.
void gsc_save_pedigrees(const char *fname, const gsc_SimData *d, const gsc_GroupNum groupID, const _Bool full_pedigree)
Prints pedigrees of genotypes in the simulation to a file.
char * gsc_get_genes_of_index(const gsc_AlleleMatrix *start, const unsigned int index)
Get the alleles of a genotype by its index.
char * gsc_get_name_of_id(const gsc_AlleleMatrix *start, const gsc_PedigreeID id)
Returns the name of the genotype with a given id.
void gsc_get_ids_of_names(const gsc_AlleleMatrix *start, const size_t n_names, const char **names, gsc_PedigreeID *output)
Search for genotypes with certain names in a linked list of gsc_AlleleMatrix and save the ids of thos...
unsigned int gsc_get_index_of_child(const gsc_AlleleMatrix *start, const gsc_PedigreeID parent1id, const gsc_PedigreeID parent2id)
Search for a genotype with parentage matching two given parent ids in a linked list of gsc_AlleleMatr...
unsigned int gsc_get_index_of_name(const gsc_AlleleMatrix *start, const char *name)
Search for a genotype with a particular name in a linked list of gsc_AlleleMatrix,...
gsc_PedigreeID gsc_get_id_of_index(const gsc_AlleleMatrix *start, const unsigned int index)
Get the id of a genotype by its index.
int gsc_get_parents_of_id(const gsc_AlleleMatrix *start, const gsc_PedigreeID id, gsc_PedigreeID output[static 2])
Saves the ids of the parents of a genotype with a particular id to the output array output.
#define delete_bidirectional_iter
#define IS_VALID_LOCATION
#define INVALID_GENO_LOCATION
#define RandomAccessIterator
#define GSC_FINALISE_BUFFER(n, as, nentries)
Macro to convert a stretchy buffer to a solid heap vector.
gsc_GenotypeFileCellStyle
Represent possible representations of alleles at a marker in a genotype file.
gsc_TableFileReader gsc_tablefilereader_create(const char *filename)
Open a file for reading with gsc_TableFileReader.
#define GSC_NO_LABEL
Empty/null value for custom label identifiers.
#define GSC_STRETCH_BUFFER(n, newlen)
Macro to expand the capacity of a stretchy buffer.
#define GSC_NO_MAP
Empty/null value for recombination map identifiers.
void gsc_tablefilecell_deep_copy(gsc_TableFileCell *c)
Allocate memory to store a deep copy of a gsc_TableFileCell, if previously only a shallow copy.
void gsc_tablefilereader_close(gsc_TableFileReader *tbl)
Close a gsc_TableFileReader's file pointer.
#define GSC_DELETE_BUFFER(n)
Macro to delete a stretchy buffer.
#define GSC_NO_EFFECTSET
Empty/null value for effect set identifiers.
enum gsc_TableFileCurrentStatus gsc_helper_tablefilereader_classify_char(gsc_TableFileReader *tbl)
Classify the character under the cursor of a TableFileReader as cell contents or otherwise.
gsc_TableFileCell gsc_tablefilereader_get_next_cell(gsc_TableFileReader *tbl)
Read forwards in TableFileReader and return the next cell's contents, as well as how many column gaps...
gsc_TableFileCurrentStatus
Represent possible states of the cursor of a gsc_TableFileReader.
#define GSC_CREATE_BUFFER(n, type, length)
Macro to create a stretchy buffer of any type and some length.
const gsc_GenOptions GSC_BASIC_OPT
Default parameter values for GenOptions, to help with quick scripts and prototypes.
gsc_FileFormatSpec gsc_define_matrix_format_details(const GSC_LOGICVAL has_header, const GSC_LOGICVAL markers_as_rows, const enum gsc_GenotypeFileCellStyle cell_style)
Give genomicSimulation hints on the format of a genotype matrix file to be loaded.
void gsc_helper_tablefilereader_refill_buffer(gsc_TableFileReader *tbl)
Read another buffer's worth of characters from a gsc_TableFileReader's file.
#define GSC_NO_PEDIGREE
Empty/null value for pedigree fields.
gsc_GenotypeFileType
Enumerate types of genotype files that the simulation knows how to load.
#define GSC_NO_GROUP
Empty/null value for group allocations.
@ GSC_GENOTYPECELLSTYLE_SLASHPAIR
@ GSC_GENOTYPECELLSTYLE_PAIR
@ GSC_GENOTYPECELLSTYLE_UNKNOWN
@ GSC_GENOTYPECELLSTYLE_ENCODED
@ GSC_GENOTYPECELLSTYLE_COUNT
@ GSC_TABLEFILE_ERROR_EOF
@ GSC_TABLEFILE_ERROR_EOBUF
@ GSC_TABLEFILE_COLUMNGAP
@ GSC_GENOTYPEFILE_MATRIX
Either a marker-by-line matrix, where each marker is a row, or a line-by-marker matrix,...
@ GSC_GENOTYPEFILE_UNKNOWN
gsc_GroupNum gsc_get_next_free_group_num(const size_t n_existing_groups, const gsc_GroupNum *existing_groups, size_t *cursor, gsc_GroupNum previous)
Iterator to get the next currently-free group number.
unsigned int gsc_get_from_ordered_pedigree_list(const gsc_PedigreeID target, const unsigned int listLen, const gsc_PedigreeID *list)
Binary search through list of unsigned integers.
gsc_GroupNum gsc_get_new_group_num(gsc_SimData *d)
Function to identify the next sequential integer that does not identify a group that currently has me...
void gsc_change_label_to(gsc_SimData *d, const gsc_GroupNum whichGroup, const gsc_LabelID whichLabel, const int setTo)
Set the values of a custom label.
struct gsc_TableSize gsc_get_file_dimensions(const char *filename, const char sep)
Opens a table file and reads the number of columns and rows (including headers) separated by sep into...
int gsc_get_integer_digits(const int i)
Count and return the number of digits in i.
unsigned int gsc_get_index_of_map(const gsc_SimData *d, const gsc_MapID map)
Function to identify the lookup index of a recombination map identifier.
unsigned int gsc_get_index_of_label(const gsc_SimData *d, const gsc_LabelID label)
Function to identify the label lookup index of a label identifier.
gsc_MapID gsc_get_new_map_id(const gsc_SimData *d)
Function to identify the next sequential integer that is not already allocated to a map ID in the sim...
void gsc_shuffle_up_to(rnd_pcg_t *rng, void *sequence, const size_t item_size, const size_t total_n, const size_t n_to_shuffle)
Produce a random ordering of the first n elements in an array using a (partial) Fisher-Yates shuffle.
void gsc_change_label_default(gsc_SimData *d, const gsc_LabelID whichLabel, const int newDefault)
Set the default value of a custom label.
size_t gsc_get_from_ordered_str_list(const char *target, const size_t listLen, const char **list)
Binary search through a list of strings.
unsigned int gsc_change_eff_set_centre_of_allele_count(gsc_SimData *d, const gsc_EffectID effset, const unsigned int n_markers, const char **marker_names, const double *centres, const char allele, const _Bool reset_centres)
Replace the centring values of specific markers in an effect set.
_Bool gsc_change_eff_set_centres_to_values(gsc_SimData *d, const gsc_EffectID effset, const unsigned int n_values, const double *values)
Replace the centring values of all markers in an effect set.
size_t gsc_get_from_unordered_str_list(const char *target, const size_t listLen, const char **list)
Linear search through a list of strings.
unsigned int gsc_change_eff_set_centre_of_markers(gsc_SimData *d, const gsc_EffectID effset, const unsigned int n_markers, const char **marker_names, const double *centres)
Replace the centring values of specific markers in an effect set.
gsc_EffectID gsc_get_new_eff_set_id(const gsc_SimData *d)
Function to identify the next sequential integer that is not already allocated to a marker effect set...
void gsc_get_n_new_group_nums(gsc_SimData *d, const size_t n, gsc_GroupNum *result)
Function to identify the next n sequential integers that do not identify a group that currently has m...
void gsc_change_label_to_values(gsc_SimData *d, const gsc_GroupNum whichGroup, const unsigned int startIndex, const gsc_LabelID whichLabel, const size_t n_values, const int *values)
Copy a vector of integers into a custom label.
gsc_LabelID gsc_get_new_label_id(const gsc_SimData *d)
Function to identify the next sequential integer that is not already allocated to a label in the simu...
unsigned int gsc_get_index_of_eff_set(const gsc_SimData *d, const gsc_EffectID eff_set_id)
Function to identify the lookup index of a marker effect set identifier.
gsc_LabelID gsc_create_new_label(gsc_SimData *d, const int setTo)
Initialises a new custom label.
void gsc_change_names_to_values(gsc_SimData *d, const gsc_GroupNum whichGroup, const unsigned int startIndex, const size_t n_values, const char **values)
Copy a vector of strings into the genotype name field.
_Bool gsc_get_index_of_genetic_marker(const char *target, gsc_KnownGenome g, unsigned int *out)
Return whether or not a marker name is present in the tracked markers, and at what index.
void gsc_change_allele_symbol(gsc_SimData *d, const char *which_marker, const char from, const char to)
Replace all occurences of a given allele with a different symbol representation.
void gsc_change_label_by_amount(gsc_SimData *d, const gsc_GroupNum whichGroup, const gsc_LabelID whichLabel, const int byValue)
Increment the values of a custom label.
void gsc_condense_allele_matrix(gsc_SimData *d)
A function to tidy the internal storage of genotypes after addition or deletion of genotypes in the g...
unsigned int gsc_randomdraw_replacementrules(gsc_SimData *d, unsigned int max, unsigned int cap, unsigned int *member_uses, unsigned int noCollision)
Randomly pick a number in a range, optionally with a cap on how many times a number can be picked,...
static gsc_GroupNum gsc_helper_split_by_allocator_knowncounts(gsc_GenoLocation loc, gsc_SimData *d, void *datastore, size_t n_outgroups, size_t *subgroupsfound, gsc_GroupNum *outgroups)
static void gsc_helper_genoptions_save_bvs(FILE *fe, gsc_MarkerEffects *effMatrices, unsigned int effIndex, gsc_AlleleMatrix *tosave)
save-as-you-go (breeding values)
static gsc_GroupNum gsc_helper_split_by_quality_halfsib2(gsc_GenoLocation loc, void *datastore, size_t maxgroups, size_t groupsfound, gsc_GroupNum *results)
static int gsc_helper_parentchooser_cross_randomly(void *parentIterator, union gsc_datastore_make_genotypes *datastore, unsigned int *counter, gsc_ParentChoice parents[static 2])
parentChooser function parameter for gsc_make_random_crosses.
static void gsc_helper_genoptions_save_genotypes(FILE *fg, gsc_AlleleMatrix *tosave)
save-as-you-go (genotypes/alleles)
static struct gsc_EmptyListNavigator gsc_create_emptylistnavigator(gsc_SimData *d, gsc_GroupNum allocation_group)
Create a new gsc_EmptyListNavigator, including an empty AlleleMatrix suitable for inserting into the ...
static gsc_EffectID gsc_helper_insert_eff_set_into_simdata(gsc_SimData *d, gsc_MarkerEffects effset)
Save a set of MarkerEffects to the SimData and allocate it an EffectID.
static FILE * gsc_helper_genoptions_save_pedigrees_setup(const gsc_GenOptions g)
Opens file for writing save-as-you-go pedigrees in accordance with gsc_GenOptions.
static void gsc_helper_output_genotypematrix_cell(FILE *f, gsc_GenoLocation loc, unsigned int markerix, void *GSC_NA)
Kernel for gsc_scaffold_save_genotype_info, when the goal is to save the (phased) allele pairs of eac...
static void gsc_helper_make_offspring_doubled_haploids(gsc_SimData *d, union gsc_datastore_make_genotypes *datastore, gsc_ParentChoice parents[static 2], gsc_GenoLocation putHere)
offspringGenerator function parameter for gsc_make_doubled_haploids.
static int gsc_helper_parentchooser_cross_targeted(void *parentIterator, union gsc_datastore_make_genotypes *datastore, unsigned int *counter, gsc_ParentChoice parents[static 2])
parentChooser function parameter for gsc_make_targeted_crosses.
static int gsc_helper_parentchooser_cross_randomly_between(void *parentIterator, union gsc_datastore_make_genotypes *datastore, unsigned int *counter, gsc_ParentChoice parents[static 2])
parentChooser function parameter for gsc_make_random_crosses_between.
static FILE * gsc_helper_genoptions_save_genotypes_setup(const gsc_SimData *d, const gsc_GenOptions g)
Opens file for writing save-as-you-go genotypes in accordance with gsc_GenOptions.
static gsc_MapID gsc_helper_insert_recombmap_into_simdata(gsc_SimData *d, gsc_RecombinationMap map)
Save a RecombinationMap to the SimData and allocate it a mapID.
static void gsc_set_names(gsc_AlleleMatrix *a, const char *prefix, const int suffix, const unsigned int from_index)
Fills the designated section of the .names array in an gsc_AlleleMatrix with the pattern "`prefix`ind...
static struct gsc_GenotypeFile_MatrixFormat gsc_helper_genotypefile_matrix_detect_orientation(const gsc_SimData *d, const gsc_TableFileCell *cellqueue, const size_t firstrowlen, const size_t queuelen, struct gsc_GenotypeFile_MatrixFormat format, const char *filenameforlog)
Determine whether a genotype matrix is row- or column-oriented.
static void gsc_scaffold_save_genotype_info(FILE *f, gsc_BidirectionalIterator *targets, unsigned int n_markers, char **const marker_names, const _Bool markers_as_rows, void(*bodycell_printer)(FILE *, gsc_GenoLocation, unsigned int, void *), void *bodycell_printer_data)
Prints a matrix of genotype information to a file.
static int gsc_helper_ascending_pdouble_comparer(const void *pp0, const void *pp1)
Comparator function for qsort.
static struct gsc_GenotypeFile_MatrixFormat gsc_helper_genotypefile_matrix_detect_header(const gsc_TableFileCell *cellqueue, const size_t firstrowlen, const size_t queuelen, struct gsc_GenotypeFile_MatrixFormat format, const char *filenameforlog)
Determine whether a genotype matrix has a header row or not.
static void gsc_helper_make_offspring_self_n_times(gsc_SimData *d, union gsc_datastore_make_genotypes *datastore, gsc_ParentChoice parents[static 2], gsc_GenoLocation putHere)
offspringGenerator function parameter for gsc_self_n_times.
static GSC_LOGICVAL gsc_helper_parse_ncell_header(int ncellrow1, gsc_TableFileCell *unprocessedqueue, int ncell_required, const char **titles_required, int ncell_optional, const char **titles_optional, int *col_order)
Header row reading and processing for map and effect set files.
static int gsc_helper_descending_pdouble_comparer(const void *pp0, const void *pp1)
Comparator function for qsort.
static int gsc_helper_ascending_double_comparer(const void *pp0, const void *pp1)
Comparator function for qsort.
static void gsc_helper_ancestry_intprinter_file(long unsigned int i, void *data)
Kernel for scaffold functions that require printing an integer to a file (as opposed to saving the in...
static gsc_GenoLocation gsc_emptylistnavigator_get_first(struct gsc_EmptyListNavigator *it)
Reset the cursor of a gsc_EmptyListNavigator to the first genotype.
static void gsc_emptylistnavigator_finaliselist(struct gsc_EmptyListNavigator *it)
Push emptylist edited genotypes into the SimData.
static gsc_GenoLocation gsc_emptylistnavigator_get_next(struct gsc_EmptyListNavigator *it)
Get the next sequential genotype in an gsc_EmptyListNavigator.
static int gsc_helper_mapfileunit_ascending_d_comparer(const void *p0, const void *p1)
Comparator function for qsort.
static gsc_GroupNum gsc_helper_split_by_quality_individuate(gsc_GenoLocation loc, void *datastore, size_t maxgroups, size_t groupsfound, gsc_GroupNum *results)
static gsc_GenoLocation gsc_nextgappy_valid_pos(struct gsc_GappyIterator *it)
Sets the current cursor position in a gsc_GappyIterator to the next valid position,...
static gsc_GroupNum gsc_helper_split_by_quality_halfsib1(gsc_GenoLocation loc, void *datastore, size_t maxgroups, size_t groupsfound, gsc_GroupNum *results)
static GSC_LOGICVAL gsc_helper_is_marker_in_chr(const unsigned int markerix, const gsc_LinkageGroup chr, double *pos)
Check if a marker index is found in a particular LinkageGroup, and provide its distance along the chr...
static void gsc_helper_genotypecell_to_allelematrix(gsc_GenoLocation loc, unsigned int markerix, enum gsc_GenotypeFileCellStyle style, char *cell, gsc_SimData *forrng)
Parse a string and save it as the alleles of a genotype at a particular location and genetic marker.
static void gsc_scaffold_save_ancestry_of(const gsc_AlleleMatrix *m, gsc_PedigreeID p1, gsc_PedigreeID p2, void(*strprinter)(char *, size_t, void *), void(*intprinter)(long unsigned int, void *), void *printer_data)
Identifies and saves (recursively) the pedigree of a pair of parents.
static gsc_TableFileCell gsc_helper_tablefilereader_get_next_cell_wqueue(gsc_TableFileReader *tf, gsc_TableFileCell **queue, size_t *queuesize)
Return the next cell from a queue of cells until the queue is exhausted, and thereafter read new cell...
static int gsc_helper_parentchooser_cloning(void *parentIterator, union gsc_datastore_make_genotypes *datastore, unsigned int *counter, gsc_ParentChoice parents[static 2])
parentChooser function parameter for gsc_make_clones.
static gsc_GroupNum gsc_helper_split_by_quality_halfsibtemplate(gsc_GenoLocation loc, void *datastore, size_t maxgroups, size_t groupsfound, gsc_GroupNum *results, gsc_PedigreeID(*getparent)(gsc_GenoLocation))
static gsc_GroupNum gsc_helper_split_by_allocator_equalprob(gsc_GenoLocation loc, gsc_SimData *d, void *datastore, size_t n_outgroups, size_t *subgroupsfound, gsc_GroupNum *outgroups)
static void gsc_helper_make_offspring_cross(gsc_SimData *d, union gsc_datastore_make_genotypes *datastore, gsc_ParentChoice parents[static 2], gsc_GenoLocation putHere)
offspringGenerator function parameter for all crossing functions
static void gsc_helper_genoptions_save_pedigrees(FILE *fp, gsc_SimData *d, gsc_AlleleMatrix *tosave)
save-as-you-go (pedigrees)
static int gsc_helper_mapfileunit_ascending_chr_comparer(const void *p0, const void *p1)
Comparator function for qsort.
static enum gsc_GenotypeFileCellStyle gsc_helper_genotype_matrix_identify_cell_style(gsc_TableFileCell c)
Identify what formatting a genotype matrix is representing alleles as.
static void * gsc_malloc_wrap(const size_t size, char exitonfail)
Replace calls to malloc direct with this function.
static int gsc_helper_parentchooser_selfing(void *parentIterator, union gsc_datastore_make_genotypes *datastore, unsigned int *counter, gsc_ParentChoice parents[static 2])
parentChooser function parameter for gsc_self_n_times.
static void gsc_helper_make_offspring_clones(gsc_SimData *d, union gsc_datastore_make_genotypes *datastore, gsc_ParentChoice parents[static 2], gsc_GenoLocation putHere)
offspringGenerator function parameter for gsc_make_clones.
gsc_GroupNum gsc_make_n_crosses_from_top_m_percent(gsc_SimData *d, const int n, const int m, const gsc_GroupNum group, const gsc_MapID mapID, const gsc_EffectID effID, const gsc_GenOptions g)
static void gsc_helper_genoptions_give_names_and_ids(gsc_AlleleMatrix *am, gsc_SimData *d, const gsc_GenOptions g)
Apply gsc_GenOptions naming scheme and gsc_PedigreeID allocation to a single gsc_AlleleMatrix.
static FILE * gsc_helper_genoptions_save_bvs_setup(const gsc_SimData *d, const gsc_GenOptions g, unsigned int *effIndexp)
Opens file for writing save-as-you-go breeding values in accordance with gsc_GenOptions.
static int gsc_helper_read_first_row(gsc_TableFileReader *tf, int min_headerlen, int max_headerlen, gsc_TableFileCell *outputq, size_t *queuesize)
Save the first row of cells from a table file to a queue.
static void gsc_helper_sort_markerlist(unsigned int n_markers, struct gsc_MapfileUnit *markerlist)
Sort markerlist by chromosome name, and by position within each chromosome.
static struct gsc_GenotypeFile_MatrixFormat gsc_helper_genotypefile_matrix_detect_cellstyle(const gsc_TableFileCell *cellqueue, const size_t firstrowlen, const size_t queuelen, struct gsc_GenotypeFile_MatrixFormat format, const char *filenameforlog)
Determine the style in which alleles are stored in a genotype matrix.
static int gsc_helper_effectfileunit_ascending_mix_comparer(const void *p0, const void *p1)
Comparator function for qsort.
static void gsc_helper_output_countmatrix_cell(FILE *f, gsc_GenoLocation loc, unsigned int markerix, void *data)
Kernel for gsc_scaffold_save_genotype_info, when the goal is to save the allele counts of a particula...
static gsc_GroupNum gsc_load_genotypefile_matrix(gsc_SimData *d, const char *filename, const gsc_FileFormatSpec format)
Loads a genotype file, with or without existing genome model in the SimData.
static int gsc_helper_indirect_alphabetical_str_comparer(const void *p0, const void *p1)
Comparator function for qsort.
static unsigned int gsc_helper_str_markerlist_leftjoin(gsc_KnownGenome g, unsigned int n_markers_in_list, struct gsc_MapfileUnit **markerlist)
Discard markers whose names are not present in a gsc_KnownGenome.
static size_t gsc_helper_parse_mapfile(const char *filename, struct gsc_MapfileUnit **out)
Extract the contents of a genetic map file.
static gsc_GenoLocation gsc_nextgappy_get_gap(struct gsc_GappyIterator *it)
Sets the current cursor position in a gsc_GappyIterator to the next empty position,...
static GSC_LOGICVAL gsc_helper_genotypefile_matrix_detect_cornercell_presence(const size_t ncellsfirstrow, const size_t ncellssecondrow, const _Bool secondrowheaderisempty)
Determine whether a genotype matrix has a corner cell or not.
static unsigned int gsc_helper_random_cross_checks(gsc_SimData *d, const gsc_GroupNum from_group, const unsigned int n_crosses, const unsigned int cap)
Check input parameters of random crossing functions.
static gsc_GenoLocation gsc_nextgappy_get_nongap(struct gsc_GappyIterator *it)
Sets the current cursor position in a gsc_GappyIterator to the next filled position,...
static void gsc_helper_ancestry_strprinter_file(char *str, size_t strlen, void *data)
Kernel for scaffold functions that require printing a string to a file (as opposed to saving the stri...
static gsc_GroupNum gsc_helper_split_by_allocator_unequalprob(gsc_GenoLocation loc, gsc_SimData *d, void *datastore, size_t n_outgroups, size_t *subgroupsfound, gsc_GroupNum *outgroups)
static gsc_GroupNum gsc_helper_split_by_quality_family(gsc_GenoLocation loc, void *datastore, size_t maxgroups, size_t groupsfound, gsc_GroupNum *results)
#define GSC_ID_T
genomicSimulation's "ID" type
#define GSC_NA_IDX
When accessing the current array index of a unique session ID, the "ID not found"/failure value is -1...
#define GSC_GLOBALX_T
genomicSimulation's "Candidate global index" type
#define GSC_GENOLEN_T
genomicSimulation's "Genotype length" type
GSC_LOGICVAL
genomicSimulation's "logical value" type
#define GSC_NA_LOCALX
For candidate local indexes, the INVALID/UNINITIALISED value is -1 (for signed types) or the maximum ...
#define GSC_LOCALX_T
genomicSimulation's "Candidate local index" type
#define GSC_NA_ID
For unique session IDs, the INVALID/UNINITIALISED value is 0.
#define GSC_NA_GLOBALX
For candidate global indexes, the INVALID/UNINITIALISED value is -1 (for signed types) or the maximum...
char * names[1000]
Array of dynamically allocated strings containing the names of the lines/genotypes in this matrix.
unsigned int n_genotypes
Number of genotypes currently loaded in this matrix.
unsigned int n_markers
Number of markers across which genotypes are tracked.
char * alleles[1000]
A matrix of SNP markers by lines/genotypes containing pairs of alleles eg TT, TA.
unsigned int n_labels
Number of custom labels currently available to this gsc_AlleleMatrix.
gsc_AlleleMatrix * next
Pointer to the next gsc_AlleleMatrix in the linked list, or NULL if this entry is the last.
int ** labels
Pointer to list of labels.
gsc_PedigreeID ids[1000]
Unique ID for each genotype.
gsc_GroupNum groups[1000]
Group allocation of each genotype.
gsc_PedigreeID pedigrees[2][1000]
Two lists of integer IDs of the parents of this genotype (if tracked), or 0 if we don't know/care.
A structure to iterate forwards and backwards through all genotypes in a gsc_SimData or through only ...
_Bool atEnd
Boolean that is TRUE if the iterator's 'cursor' is on the last genotype (genotype with the highest in...
unsigned int cachedAMIndex
Index of cachedAM in the linked list of gsc_AlleleMatrix beginning at d->m.
const gsc_GroupNum group
Group through which to iterate.
gsc_AlleleMatrix * cachedAM
Pointer to the gsc_AlleleMatrix from the linked list of gsc_AlleleMatrix beginning at d->m where the ...
_Bool atStart
Boolean that is TRUE if the iterator's 'cursor' is on the first genotype (genotype with the lowest in...
unsigned int localPos
Local index (index within the cachedAM) of the genotype in the linked list of gsc_AlleleMatrix beginn...
gsc_AlleleMatrix * am
Simulation genotypes through which to iterate.
A row-major heap matrix that contains floating point numbers.
size_t dim1
Number of rows in the matrix.
double ** matrix
The actual matrix and contents.
size_t dim2
number of columns in the matrix
A type representing a particular loaded set of marker effects.
Unprocessed data for one marker effect loaded from an effect file.
A structure to hold an initially empty AlleleMatrix list whose genotypes can be accessed sequentially...
gsc_GroupNum alloctogroup
gsc_AlleleMatrix * firstAM
gsc_AlleleMatrix * localAM
A structure to iterate forwards through all positions in the gsc_AlleleMatrix linked list in gsc_SimD...
unsigned int cursorAMIndex
A type that contains choices of settings for gsc_SimData functions that create a new gsc_AlleleMatrix...
_Bool will_allocate_ids
A boolean: whether to allocate generated offspring session- unique IDs.
_Bool will_track_pedigree
A boolean: whether to track parentage of generated offspring.
_Bool will_name_offspring
A boolean: whether generated offspring should be given names.
const char * filename_prefix
A string used in save-as-you-go file names.
const char * offspring_name_prefix
If will_name_offspring is true, generated offspring are named with the concatenation {offspring_name_...
gsc_EffectID will_save_bvs_to_file
If equal to NO_EFFECTSET, no bvs are calculated or saved.
_Bool will_save_pedigree_to_file
A boolean.
unsigned int family_size
The number of offspring to produce from each cross.
_Bool will_save_to_simdata
A boolean.
_Bool will_save_alleles_to_file
A boolean.
An gsc_AlleleMatrix/gsc_AlleleMatrix index coordinate of a particular genotype in the simulation.
gsc_AlleleMatrix * localAM
Pointer to the gsc_AlleleMatrix in which the genotype can be found.
unsigned int localPos
Index in the localAM where the genotype can be found (min value: 0.
A type representing the identifier of a group of genotypes.
A type that stores the genome structure used in simulation.
char ** marker_names
A vector of n_markers strings containing the names of markers, ordered according to their index in an...
gsc_RecombinationMap * maps
A vector of n_maps recombination maps, to use for simulating meiosis.
char *** names_alphabetical
A vector of n_markers pointers to names in marker_names, ordered in alphabetical order of the names.
unsigned int n_markers
The total number of markers.
unsigned int n_maps
The number of recombination maps currently stored.
gsc_MapID * map_ids
A vector of n_maps identifiers for each of the recombination maps currently stored.
A type representing a particular custom label.
A generic store for a linkage group, used to simulate meiosis on a certain subset of markers.
gsc_ReorderedLinkageGroup reorder
enum gsc_LinkageGroup::gsc_LinkageGroupType type
gsc_SimpleLinkageGroup simple
union gsc_LinkageGroup::@6 map
A type representing a particular loaded recombination map.
Unprocessed data for one marker (linkage group and position) loaded from a map file.
A struct used to store a set of blocks of markers.
unsigned int num_blocks
The number of blocks whose details are stored here.
unsigned int * num_markers_in_block
Pointer to a heap array of length num_blocks containing the number of markers that make up each block...
unsigned int ** markers_in_block
Pointer to a heap array of length num_blocks, each entry in which is a pointer to a heap array with l...
A type that stores the information needed to calculate breeding values from alleles at markers.
unsigned int * cumn_alleles
A vector of length n_markers holding the cumulative number of alleles that have effects on breeding v...
double * eff
A vector holding the effect on breeding value of each allele at each marker.
unsigned int n_markers
Number of markers across which genotypes are tracked.
double * centre
Vector of length n_markers, containing a value for each marker which represents the value to subtract...
char * allele
A vector holding the symbol/character representing each allele at each marker.
Simple crate that stores a GroupNum, a MapID, and an EffectID.
gsc_GenoLocation loc
Location in the simulation where this parent is stored.
unsigned int mapindex
Index in d->genome.maps of the recombination map to use when producing gametes from this parent.
A type representing a program-lifetime-unique identifier for a genotype, to be used in tracking pedig...
A structure to search and cache indexes of all genotypes in a gsc_SimData or of all the members of a ...
unsigned int cacheSize
Length in gsc_GenoLocations of cache
const gsc_GroupNum group
Group through which to iterate.
unsigned int largestCached
Local/group index (that is, index in cache) of the highest cell in cache that has been filled.
gsc_SimData * d
Simulation data through which to iterate.
unsigned int groupSize
If the number of genotypes in the simulation that fulfil the iterator's group criteria is known,...
gsc_GenoLocation * cache
Array iteratively updated with the known genotypes in the simulation that fulfil the group criteria o...
A type that stores linkage groups and crossover probabilities for simulating meiosis.
size_t n_chr
The number of chromosomes/linkage groups represented in the map.
gsc_LinkageGroup * chrs
Vector of n_chr recombination maps, one for each chromosome/linkage group in this recombination map.
char ** chr_names
An identifying code for each chromosome/linkage group in the map.
unsigned int n_markers
The number of markers in this chromosome/linkage group.
double expected_n_crossovers
Expected value of the Poisson distribution from which the number of crossovers in this linkage group ...
unsigned int * marker_indexes
Array with n_markers entries.
double * dists
Array with n_markers entries, containing at position i the distance in centimorgans along the linkage...
Composite type that is used to run crossing simulations.
unsigned int n_labels
The number of custom labels in the simulation.
gsc_KnownGenome genome
A gsc_KnownGenome, which stores the information of known markers and linkage groups,...
gsc_LabelID * label_ids
The identifier number of each label in the simulation, in order of their lookup index.
gsc_EffectID * eff_set_ids
The identifier number of each set of allele effects in the simulation, ordered by their lookup index.
int * label_defaults
Array containing the default (birth) value of each custom label.
unsigned int n_groups
Number of groups currently existing in simulation.
unsigned int n_eff_sets
The number of sets of allele effects in the simulation.
gsc_PedigreeID current_id
Highest SimData-unique ID that has been generated so far.
rnd_pcg_t rng
Random number generator working memory.
gsc_AlleleMatrix * m
Pointer to an gsc_AlleleMatrix, which stores data and metadata of founders and simulated offspring.
gsc_MarkerEffects * e
Array of n_eff_sets gsc_MarkerEffects, optional for the use of the simulation.
unsigned int first_marker_index
The index of the first marker in this chromosome/linkage group in the simulation's corresponding gsc_...
unsigned int n_markers
The number of markers in this chromosome/linkage group.
double * dists
Array with n_markers entries, containing at position i the distance in centimorgans along the linkage...
double expected_n_crossovers
Expected value of the Poisson distribution from which the number of crossovers in this linkage group ...
Represent a cell read by a gsc_TableFileReader.
int predCol
since last read, how many column gaps have there been?
char * cell
deep copy of the cell contents, or NULL
_Bool isCellShallow
is the string in 'cell' a shallow copy or deep copy?
int predNewline
since last read, how many newlines have there been?
_Bool eof
are we (this cell) at end of file
size_t cell_len
length of cell contents (because a shallow copy may not be null-terminated)
Stream reader for files of some tabular format.
int buf_fill
Number of characters from the file that are currently loaded in buf.
char buf[8192]
A window of characters from the file, loaded into memory for current processing.
int cursor
Index in buf of the first character that the file reader has not yet parsed.
FILE * fp
File being read.
struct gsc_datastore_make_genotypes::@5 clones
unsigned int n_gens_selfing
struct gsc_datastore_make_genotypes::@3 selfing
unsigned int bad_pairings
struct gsc_datastore_make_genotypes::@1 rand_btwn
struct gsc_datastore_make_genotypes::@0 rand
unsigned int * first_parents
unsigned int * second_parents
struct gsc_datastore_make_genotypes::@2 targeted
struct gsc_datastore_make_genotypes::@4 doub_haps