12 .offspring_name_prefix = NULL,
16 .filename_prefix = NULL,
36 fprintf(stderr,
"0 memory allocation requested.\n");
42 fprintf(stderr,
"Memory allocation failed. Exiting.\n"); exit(2);
44 fprintf(stderr,
"Memory allocation failed.\n");
64 const int* labelDefaults,
74 memset(m->
alleles[i], 0,
sizeof(
char) * (n_markers<<1));
81 for (
GSC_ID_T i = 0; i < n_labels; ++i) {
84 m->
labels[i][j] = labelDefaults[i];
87 }
else if (n_labels == 0) {
90 fprintf(stderr,
"Invalid negative number of labels provided to gsc_create_empty_allelematrix");
126 rnd_pcg_seed( &d->
rng, RNGseed );
203 double target = exp(-lambda);
204 double p = rnd_pcg_nextf(rng);
207 p *= rnd_pcg_nextf(rng);
241 if ((fp = fopen(filename,
"r")) == NULL) {
242 fprintf(stderr,
"Failed to open file %s.\n", filename); exit(1);
246 while (c != EOF && c !=
'\n') {
267 if (has_length && sep_count != details.
num_columns-1) {
271 fprintf(stderr,
"Bad columns on row %d\n", details.
num_rows + 1); exit(1);
276 }
else if (c == sep) {
395 while (list[index].
id != target.
id && first <= last) {
399 if (index+lookahead <= last && list[index+lookahead].
id !=
GSC_NO_PEDIGREE.id) {
400 if (list[index+lookahead].
id == target.
id) {
401 return index+lookahead;
402 }
else if (list[index+lookahead].
id < target.
id) {
403 first = index+lookahead + 1;
409 }
else if (index-lookahead <= last && list[index-lookahead].
id !=
GSC_NO_PEDIGREE.id) {
410 if (list[index-lookahead].
id == target.
id) {
411 return index-lookahead;
412 }
else if (list[index-lookahead].
id < target.
id) {
416 last = index-lookahead - 1;
421 if (index+lookahead <= last || index-lookahead >= first) {
428 if (list[index].
id == target.
id) {
430 }
else if (list[index].
id < target.
id) {
438 index = (first + last) / 2;
464 const size_t listLen,
466 for (
size_t i = 0; i < listLen; ++i) {
467 if (strcmp(list[i], target) == 0) {
491 const size_t listLen,
493 size_t first = 0, last = listLen - 1;
494 size_t index = (first + last) / 2;
495 int comparison = strcmp(target,list[index]);
496 while (comparison != 0 && first <= last) {
497 if (comparison == 0) {
499 }
else if (comparison < 0) {
506 index = (first + last) / 2;
507 comparison = strcmp(target, list[index]);
537 const size_t item_size,
538 const size_t total_n,
539 const size_t n_to_shuffle) {
540 if (n_to_shuffle > 1) {
543 void* tmp = &tmp_spot;
544 if (item_size >
sizeof(tmp_spot)) {
548 size_t maxi = total_n > n_to_shuffle ? n_to_shuffle - 1 : total_n - 1;
550 for (i = 0; i <= maxi; ++i) {
552 size_t j = i + rnd_pcg_range(rng,0,total_n - i - 1);
555 memcpy(&tmp, sequence + j*item_size, item_size);
556 memcpy(sequence + j*item_size, sequence + i*item_size, item_size);
557 memcpy(sequence + i*item_size, &tmp, item_size);
560 if (item_size >
sizeof(tmp_spot)) {
586 if (prefix == NULL) {
592 sprintf(format,
"%s%s", prefix, sname);
594 int livingsuffix = suffix;
598 if (a->
names[i] != NULL) {
603 sprintf(sname, format, livingsuffix);
605 strcpy(a->
names[i], sname);
656 memset(new_label_defaults, 0,
sizeof(
int) * d->
n_labels);
658 new_label_defaults[d->
n_labels] = setTo;
662 fprintf(stderr,
"Labels malformed; gsc_SimData may be corrupted\n");
687 int** oldLabelList = m->
labels;
690 m->
labels[i] = oldLabelList[i];
700 m->
labels[newLabel][i] = setTo;
719 }
else if (!warned) {
720 fprintf(stderr,
"Unable to create new label for all genotypes; gsc_SimData may be corrupted\n");
724 }
while ((m = m->
next) != NULL);
741 const int newDefault) {
744 fprintf(stderr,
"Nonexistent label %lu\n", (
long unsigned int) whichLabel.
id);
771 fprintf(stderr,
"Nonexistent label %lu\n", (
long unsigned int) whichLabel.
id);
783 m->
labels[labelIndex][i] = setTo;
787 }
while ((m = m->
next) != NULL);
796 m->
labels[labelIndex][i] = setTo;
800 }
while ((m = m->
next) != NULL);
827 fprintf(stderr,
"Nonexistent label %lu\n", (
long unsigned int) whichLabel.
id);
839 m->
labels[labelIndex][i] += byValue;
843 }
while ((m = m->
next) != NULL);
849 m->
labels[labelIndex][i] += byValue;
852 }
while ((m = m->
next) != NULL);
884 const size_t n_values,
888 fprintf(stderr,
"Nonexistent label %lu\n", (
long unsigned int) whichLabel.
id);
901 if (currentIndex >= startIndex) {
902 m->
labels[labelIndex][i] = values[currentIndex - startIndex];
905 if (currentIndex > startIndex && currentIndex - startIndex >= n_values) {
911 }
while ((m = m->
next) != NULL);
918 if (currentIndex >= startIndex) {
919 m->
labels[labelIndex][i] = values[currentIndex - startIndex];
922 if (currentIndex > startIndex && currentIndex - startIndex >= n_values) {
927 }
while ((m = m->
next) != NULL);
961 const size_t n_values,
962 const char** values) {
974 if (currentIndex >= startIndex) {
976 if (m->
names[i] != NULL) {
983 strcpy(m->
names[i], values[whichName]);
986 if (currentIndex > n_values) {
992 }
while ((m = m->
next) != NULL);
999 if (currentIndex >= startIndex) {
1001 if (m->
names[i] != NULL) {
1007 const int nameLen = strlen(values[whichName]);
1009 strncpy(m->
names[i], values[whichName], nameLen);
1012 if (currentIndex > n_values) {
1017 }
while ((m = m->
next) != NULL);
1042 const char* which_marker,
1047 unsigned int nalleles = 0;
1050 if (which_marker == NULL) {
1106 printf(
"Changed allele %c to %c %lu times across %lu markers and %lu genotypes\n",
1107 from, to, (
long unsigned int)nalleles, (
long unsigned int)nmarkers, (
long unsigned int)ngenos);
1116 int digits = 0, ii = i;
1132 double d0 = **(
double **)pp0;
1133 double d1 = **(
double **)pp1;
1148 double d0 = *(
double *)pp0;
1149 double d1 = *(
double *)pp1;
1165 double d0 = **(
double **)pp0;
1166 double d1 = **(
double **)pp1;
1178 char* str1 = **(
char***)p0;
1179 char* str2 = **(
char***)p1;
1180 return strcmp(str1,str2);
1219 int* label_defaults) {
1224 fprintf(stderr,
"In moving a genotype from %p:%lu to %p:%lu, the genotype at %p:%lu will be overwritten\n",
1247 fprintf(stderr,
"Origin and destination when copying genotype do not have the same number of custom"
1248 " labels (n_labels). The genotype now at %p:%lu will have lost its label data\n",
1251 fprintf(stderr,
"Label defaults must be supplied to gsc_move_genotypes or there is risk of "
1252 "corrupted label values in further use of the simulation");
1390 if (previous != NULL) {
1391 previous->next = NULL;
1502 if (firstAM->
next == NULL) {
1506 firstAM = firstAM->
next;
1525 firstAM = firstAM->
next;
1526 if (firstAM == NULL) {
1555 .cacheSize = cacheSize,
1570 unsigned int currentIndex = 0;
1572 while (currentIndex < n) {
1573 if (am->
next == NULL) {
1596 unsigned int firstAMIndex = 0;
1605 if (firstAM->
next == NULL) {
1609 firstAM = firstAM->
next;
1633 firstAM = firstAM->
next;
1635 if (firstAM == NULL) {
1674 unsigned int lastAMIndex = 0;
1683 lastAM = lastAM->
next;
1696 lastAM = lastAM->
next;
1788 nextAM = nextAM->
next;
1790 }
while (nextAM != NULL && nextAM->
n_genotypes == 0);
1792 if (nextAM == NULL) {
1827 nextAM = nextAM->
next;
1829 }
while (nextAM != NULL && nextAM->
n_genotypes == 0);
1831 if (nextAM == NULL) {
1908 }
while (nextAM != NULL && nextAM->
n_genotypes == 0);
1910 if (nextAM == NULL) {
1952 }
while (nextAM != NULL && nextAM->
n_genotypes == 0);
1954 if (nextAM == NULL) {
2013 if (n < it->cacheSize) {
2028 if (expectedLocation.
localAM == NULL ||
2032 return expectedLocation;
2051 for (; localPos < currentAM->
n_genotypes; ++localPos) {
2062 newCacheSize = newCacheSize << 1;
2072 it->
cache = newCache;
2079 .localPos = localPos
2082 return it->
cache[n];
2092 currentAM = currentAM->
next;
2123 fprintf(stderr,
"Invalid ID %lu\n", (
long unsigned int)
id.
id);
2126 if (start == NULL) {
2127 fprintf(stderr,
"Invalid nonexistent allelematrix\n"); exit(1);
2142 if (m->
next == NULL) {
2143 fprintf(stderr,
"Could not find the ID %lu: did you prematurely delete this genotype?\n", (
long unsigned int)
id.
id);
2151 return m->
names[index];
2155 if (m->
next == NULL) {
2156 fprintf(stderr,
"Could not find the ID %lu: did you prematurely delete this genotype?\n", (
long unsigned int)
id.
id);
2193 if (start == NULL) {
2194 fprintf(stderr,
"Invalid nonexistent allelematrix\n"); exit(1);
2224 if (m->
next == NULL) {
2225 fprintf(stderr,
"Unable to locate ID %lu in simulation memory (genotype has likely been deleted): pedigree past this point cannot be determined\n", (
long unsigned int)
id.
id);
2249 const size_t n_names,
2252 if (start == NULL || (start->
n_genotypes <= 0 && start->
next == NULL)) {
2253 fprintf(stderr,
"Invalid start parameter: gsc_AlleleMatrix* `start` must exist\n");
2257 fprintf(stderr,
"Invalid n_names parameter: Search list length must be positive\n");
2264 for (
size_t i = 0; i < n_names; ++i) {
2271 if (strcmp(m->
names[j], names[i]) == 0) {
2273 output[i] = m->
ids[j];
2281 if ((m = m->
next) == NULL) {
2282 fprintf(stderr,
"Didn't find the name %s\n", names[i]);
2305 if (start == NULL || (start->
n_genotypes <= 0 && start->
next == NULL)) {
2306 fprintf(stderr,
"Invalid start parameter: gsc_AlleleMatrix* `start` must exist\n");
2321 if ((m = m->
next) == NULL) {
2322 fprintf(stderr,
"Didn't find the child of %lu & %lu\n",
2323 (
long unsigned int)parent1id.
id, (
long unsigned int)parent2id.
id);
2346 if (start == NULL || (start->
n_genotypes <= 0 && start->
next == NULL)) {
2347 fprintf(stderr,
"Invalid start parameter: gsc_AlleleMatrix* `start` must exist\n");
2356 if (m->
names[j] != NULL && strcmp(m->
names[j], name) == 0) {
2361 if ((m = m->
next) == NULL) {
2362 fprintf(stderr,
"Didn't find the name %s\n", name);
2381 if (start == NULL) {
2382 fprintf(stderr,
"Invalid start parameter: gsc_AlleleMatrix* `start` must exist\n");
2389 if (total_j == index) {
2391 }
else if (total_j < index && total_j + m->n_genotypes > index) {
2392 return m->
ids[index - total_j];
2396 if ((m = m->
next) == NULL) {
2397 fprintf(stderr,
"Didn't find the index %lu\n", (
long unsigned int) index);
2420 if (start == NULL) {
2421 fprintf(stderr,
"Invalid nonexistent allelematrix\n");
2428 if (total_j == index) {
2430 }
else if (total_j < index && total_j + m->n_genotypes > index) {
2431 return m->
alleles[index - total_j];
2435 if ((m = m->
next) == NULL) {
2436 fprintf(stderr,
"Didn't find the index %lu\n", (
long unsigned int) index);
2460 const size_t list_len,
2467 for (; i < list_len; ++i) {
2473 outGroup = candidate;
2478 int remaininglistlen = list_len - i;
2479 if (remaininglistlen < 2) {
2481 }
else if (remaininglistlen == 2) {
2482 if (grouplist[i].num == grouplist[i+1].num) {
2502 memset(isDuplicate, 0,
sizeof(_Bool)*remaininglistlen);
2503 for (
size_t ii = i; ii < list_len; ++ii) {
2504 for (
size_t jj = ii+1; jj < list_len; ++jj) {
2505 if (grouplist[ii].num == grouplist[jj].num) {
2506 isDuplicate[jj-i] = 1;
2512 memset(anyFound, 0,
sizeof(_Bool)*remaininglistlen);
2522 for (
size_t k = i+1; k < list_len; ++k) {
2525 cachedgroup = grouplist[k];
2535 size_t groupsgone = 0;
2536 for (
size_t j = 0; j < remaininglistlen; ++j) {
2537 if (!isDuplicate[j] && anyFound[j]) {
2566 const size_t index_list_len,
2568 if (index_list_len < 1) {
2569 fprintf(stderr,
"Invalid index_list_len value: length of allocation list must be at least 1\n");
2575 size_t invalidLocations = 0;
2576 for (
size_t i = 0; i < index_list_len; ++i) {
2585 if (invalidLocations > 0) {
2586 fprintf(stderr,
"%lu indexes were invalid\n",(
long unsigned int)invalidLocations);
2588 if (invalidLocations < index_list_len) {
2620 const int valueToSplit) {
2623 fprintf(stderr,
"Nonexistent label %lu\n", (
long unsigned int)whichLabel.
id);
2676 const int valueLowBound,
2677 const int valueHighBound) {
2680 fprintf(stderr,
"Nonexistent label %lu\n", (
long unsigned int)whichLabel.
id);
2683 if (valueLowBound > valueHighBound) {
2684 fprintf(stderr,
"Empty range %d to %d: no group created\n", valueLowBound, valueHighBound);
2735 void* somequality_data,
2741 size_t maxentries_results,
2748 size_t bookmark = 0;
2754 size_t splitgroupsize = 0;
2755 for (
size_t i = 0; i < n_groups; ++i) {
2756 if (currentgroups[i].num == group_id.
num) {
2757 splitgroupsize = currentsizes[i];
2762 if (splitgroupsize == 0) {
2767 size_t subgroupsfound = 0;
2774 gsc_GroupNum assignedgroup = somequality_tester(loc, somequality_data,
2775 splitgroupsize, subgroupsfound, outgroups);
2779 assignedgroup = nextgroup;
2780 outgroups[subgroupsfound] = nextgroup;
2792 if (maxentries_results < subgroupsfound) {
2793 memcpy(results,outgroups,
sizeof(
gsc_GroupNum)*maxentries_results);
2794 fprintf(stderr,
"Output vector size is not large enough to hold all created groups: "
2795 " output list of gsc_GroupNums has been truncated\n");
2797 memcpy(results,outgroups,
sizeof(
gsc_GroupNum)*subgroupsfound);
2800 return subgroupsfound;
2811 for (
size_t j = 0; j < groupsfound; ++j) {
2812 if (getparent(loc).id == familyidentities[j].
id) {
2817 if (groupsfound > maxgroups) {
2818 fprintf(stderr,
"Attempted to split into more groups than caller deemed possible. "
2819 "There is a bug in the simulation tool if you can reach this state.");
2820 return results[maxgroups-1];
2823 familyidentities[groupsfound] = getparent(loc);
2881 size_t maxentries_results,
2883 if (!(parent == 1 || parent == 2)) {
2884 fprintf(stderr,
"Value error: `parent` must be 1 or 2.");
2897 maxentries_results, results);
2901 maxentries_results, results);
2915 for (
size_t j = 0; j < groupsfound; ++j) {
2922 if (groupsfound > maxgroups) {
2923 fprintf(stderr,
"Attempted to split into more groups than caller deemed possible. "
2924 "There is a bug in the simulation tool if you can reach this state.");
2925 return results[maxgroups-1];
2962 size_t maxentries_results,
2966 if (maxgroups < 2) {
2972 familyidentities[0] = p1identity;
2973 familyidentities[1] = p2identity;
2977 maxentries_results, results);
3019 size_t maxentries_results,
3025 maxentries_results, results);
3057 fprintf(stderr,
"Group %lu does not exist\n", (
long unsigned int) group_id.
num);
3059 fprintf(stderr,
"Group %lu has only one member so can't be split\n", (
long unsigned int) group_id.
num);
3108 void* someallocator_data,
3121 size_t subgroupsfound = 0;
3127 gsc_GroupNum assignedgroup = someallocator(loc, d, someallocator_data,
3128 n_outgroups, &subgroupsfound, outgroups);
3132 allocationfailures++;
3138 if (subgroupsfound > 1) {
3141 if (allocationfailures > 0) {
3142 fprintf(stderr,
"While splitting group %lu, %lu allocations to new groups failed so they remain"
3143 " in the original group\n",
3144 (
long unsigned int) group_id.
num, (
long unsigned int) allocationfailures);
3146 return subgroupsfound;
3155 size_t* subgroupsfound,
3158 *subgroupsfound = n_outgroups;
3160 int randpos = rnd_pcg_range(&d->
rng,0,cumulative_counts[n_outgroups-1] - 1);
3164 for (; j < n_outgroups; ++j) {
3165 if (randpos < cumulative_counts[j]) {
3166 chosengroup = outgroups[j];
3170 for (; j < n_outgroups; ++j) {
3171 cumulative_counts[j]--;
3202 fprintf(stderr,
"Invalid n value: number of fractions into which to split group must be at least 2\n");
3212 for (
size_t i = 0; i < n; ++i) {
3213 boxes[i] = each_size;
3218 boxes[i] += boxes[i-1];
3223 if (results == NULL) {
3280 fprintf(stderr,
"Invalid n value: number of fractions into which to split group must be at least 2\n");
3287 for (
size_t j = 0; j < n - 1; ++j) {
3289 cumulative_counts[j] = sum;
3291 if (cumulative_counts[n-2] > cumulative_counts[n-1]) {
3292 fprintf(stderr,
"Provided capacities are larger than actual group: some buckets will not be filled\n");
3296 if (results == NULL) {
3342 if (rnd_pcg_range(&d->
rng,0,1)) {
3362 size_t* subgroupsfound,
3365 size_t randgroup = rnd_pcg_range(&d->
rng,0,n_outgroups-1);
3366 if (randgroup < *subgroupsfound) {
3367 return outgroups[randgroup];
3369 (*subgroupsfound)++;
3370 return outgroups[*subgroupsfound-1];
3403 fprintf(stderr,
"Invalid n value: number of fractions in which to split group must be at least 2\n");
3408 if (results == NULL) {
3427 size_t* subgroupsfound,
3429 double* cumulative_probs = (
double*) datastore;
3430 *subgroupsfound = n_outgroups;
3431 double randdraw = rnd_pcg_nextf(&d->
rng);
3432 for (
size_t j = 0; j < n_outgroups; ++j) {
3433 if (randdraw < cumulative_probs[j]) {
3434 return outgroups[j];
3476 const double* probs,
3479 fprintf(stderr,
"Invalid n value: number of fractions in which to split group must be at least 2\n");
3485 cumulative_probs[n-1] = 1.0;
3487 for (
size_t j = 0; j < n-1; ++j) {
3489 cumulative_probs[j] = sum;
3490 if (cumulative_probs[j] >= 1) {
3491 fprintf(stderr,
"Provided probabilities add up to 1 or more: some buckets will not be filled\n");
3492 for (; j < n-1; ++j) {
3493 cumulative_probs[j] = 1;
3501 if (results == NULL) {
3562 size_t filledbuckets = 0;
3569 if (g.
num >= bucketscap) {
3570 size_t oldcap = bucketscap;
3571 size_t newbucketcapacity = bucketscap;
3572 while (g.
num >= newbucketcapacity) {
3573 newbucketcapacity *= 2;
3576 if (g.
num >= bucketscap) {
3577 fprintf(stderr,
"Memory allocation failed. Not all groups found\n");
3580 memset(buckets+oldcap,0,
sizeof(
GSC_GLOBALX_T)*(bucketscap-oldcap));
3584 buckets[g.
num] += 1;
3585 if (buckets[g.
num] == 1) {
3593 size_t capacity = filledbuckets;
3595 fprintf(stderr,
"Found more groups than expected - gsc_SimData.n_groups is outdated somewhere."
3596 " Trimming output of get_existing_group_ to avoid a crash: not all groups may be shown\n");
3600 for (
size_t i = 1; i < bucketscap; ++i) {
3608 if (out_groups != NULL) {
3611 if (out_sizes != NULL) {
3612 out_sizes[g_index] = buckets[i];
3663 nextgroup.
num = existing_groups[(*cursor) - 1].
num + 1;
3666 while (*
cursor < n_existing_groups) {
3703 while (i < n_groups) {
3704 if (gn < existing_groups[i].num) {
3732 size_t existingi = 0;
3739 for (
size_t i = 0; i < n; ++i) {
3741 while (existingi < n_groups) {
3742 if (gn < existing_groups[existingi].num) {
3766 while (i < d->n_labels) {
3767 if (
new.id < d->label_ids[i].
id) {
3790 while (i < d->n_eff_sets) {
3791 if (
new.id < d->eff_set_ids[i].
id) {
3814 while (i < d->genome.n_maps) {
3815 if (
new.id < d->genome.map_ids[i].id) {
3844 while (first <= last) {
3845 mid = (first + last) / 2;
3876 while (first <= last) {
3877 mid = (first + last) / 2;
3900 if (d->genome.n_maps == 0) {
return GSC_NA_IDX; }
3901 if (d->genome.n_maps == 1) {
return (d->genome.map_ids[0].id == map.
id) ? 0 :
GSC_NA_IDX ; }
3908 while (first <= last) {
3909 mid = (first + last) / 2;
3911 if (d->genome.map_ids[mid].id == map.
id) {
3913 }
else if (d->genome.map_ids[mid].id < map.
id) {
3953 if (m->
next == NULL) {
3987 output[outix] = m->
alleles[i];
3989 if (outix == group_size) {
3995 if (m->
next == NULL) {
4028 output[outix] = m->
names[i];
4030 if (outix == group_size) {
4036 if (m->
next == NULL) {
4069 output[outix] = m->
ids[i];
4071 if (outix == group_size) {
4077 if (m->
next == NULL) {
4110 output[outix] = total_i;
4112 if (outix == group_size) {
4118 if (m->
next == NULL) {
4150 for (
size_t i = 0; i < dm_bvs.
cols; ++i) {
4151 output[i] = dm_bvs.
matrix[0][i];
4152 if (i + 1 == group_size) {
4184 const int whichParent,
4186 if (!(whichParent == 1 || whichParent == 2)) {
4187 fprintf(stderr,
"Value error: `parent` must be 1 or 2.");
4190 int parent = whichParent - 1;
4197 output[outix] = m->
pedigrees[parent][i];
4199 if (outix == group_size) {
4205 if (m->
next == NULL) {
4235 const int whichParent,
4237 if (!(whichParent == 1 || whichParent == 2)) {
4238 fprintf(stderr,
"Value error: `parent` must be 1 or 2.");
4241 int parent = whichParent - 1;
4251 output[outix] = NULL;
4254 if (outix == group_size) {
4260 if (m->
next == NULL) {
4301 char* fname =
"gS_gpptmp";
4305 if ((fp2 = fopen(fname,
"r")) == NULL) {
4306 fprintf(stderr,
"Failed to use temporary file\n");
4313 if (group_size == 0) {
return 0; }
4337 while ((nextc = fgetc(fp2)) !=
'\n' && nextc != EOF) {
4338 output[i][index] = nextc;
4341 if (index >= size) {
4343 char* temp = realloc(output[i],
sizeof(
char) * size);
4346 fprintf(stderr,
"Memory allocation of size %u failed.\n", size);
4353 output[i][index] =
'\0';
4377 for (
size_t i = 0; i < r; ++i) {
4379 for (
size_t j = 0; j < c; ++j) {
4380 zeros.
matrix[i][j] = 0.0;
4410 fprintf(stderr,
"Dimensions invalid for adding to result: %lu does not fit in %lu\n",
4411 (
long unsigned int) a->
rows, (
long unsigned int) result->
cols);
4417 for (
size_t i = 0; i < result->
cols; ++i) {
4419 for (
size_t j = 0; j < a->
cols; ++j) {
4421 cell += (a->
matrix[i][j]) * b[j];
4424 result->
matrix[0][i] += cell;
4458 const double* bvec) {
4461 fprintf(stderr,
"Dimensions invalid for adding to result: %lu does not fit in %lu\n",
4462 (
long unsigned int) amat->
rows, (
long unsigned int) result->
cols);
4466 fprintf(stderr,
"Dimensions invalid for adding to result: %lu does not fit in %lu\n",
4467 (
long unsigned int) bmat->
rows, (
long unsigned int) result->
cols);
4471 fprintf(stderr,
"Dimensions of the two products are uneven: length %lu does not match length %lu\n",
4472 (
long unsigned int) amat->
cols, (
long unsigned int) bmat->
cols);
4478 for (
size_t i = 0; i < result->
cols; ++i) {
4480 for (
size_t j = 0; j < amat->
cols; ++j) {
4482 cell += (amat->
matrix[i][j]) * avec[j];
4483 cell += (bmat->
matrix[i][j]) * bvec[j];
4486 result->
matrix[0][i] += cell;
4500 for (
size_t i = 0; i < m->
rows; i++) {
4501 if (m->
matrix[i] != NULL) {
4536 if (m->
names[i] != NULL) {
4552 total_deleted += deleted;
4554 if (m->
next == NULL) {
4556 printf(
"%lu genotypes were deleted\n", (
long unsigned int) total_deleted);
4575 fprintf(stderr,
"Nonexistent effect set %lu\n", (
long unsigned int) effID.
id);
4594 d->
e[i] = d->
e[i+1];
4598 memcpy(newE, d->
e,
sizeof(*d->
e)*which_ix);
4599 memcpy(newE + which_ix, d->
e + which_ix + 1,
sizeof(*d->
e)*(d->
n_eff_sets - which_ix));
4605 if (newIDs == NULL) {
4630 fprintf(stderr,
"Nonexistent label %lu\n", (
long unsigned int)which_label.
id);
4649 }
while ((m = m->
next) != NULL);
4654 if (new_label_ids == NULL) {
4667 if (new_label_defaults == NULL) {
4687 if (new_label_lookups == NULL) {
4693 memcpy(new_label_lookups, m->
labels,
sizeof(*m->
labels)*label_ix);
4694 memcpy(new_label_lookups + label_ix, m->
labels + label_ix + 1,
sizeof(*m->
labels)*(m->
n_labels - label_ix));
4696 m->
labels = new_label_lookups;
4698 }
while ((m = m->
next) != NULL);
4729 if (g->
maps != NULL) {
4749 fprintf(stderr,
"Nonexistent recombination map %lu\n", (
long unsigned int) which_map.
id);
4764 if (tmplist == NULL) {
4778 if (tmpids == NULL) {
4802 if (m->
chrs != NULL) {
4805 case GSC_LINKAGEGROUP_SIMPLE:
4814 case GSC_LINKAGEGROUP_REORDER:
4857 if (m->
names[i] != NULL) {
4864 if (m->
labels[i] != NULL) {
4874 }
while ((m = next) != NULL);
5006 if ((fp = fopen(filename,
"r")) == NULL) {
5007 fprintf(stderr,
"Failed to open file %s.\n", filename);
5025 if (tbl->
fp != NULL) { fclose(tbl->
fp); }
5038 if (tbl->
fp != NULL) {
5096 .predCol = 0, .predNewline = 0, .eof =
GSC_FALSE };
5100 size_t tblbuf_offset = 0;
5101 size_t tblbuf_len = 0;
5102 int predCarriageReturn = 0;
5107 if (0 < predCarriageReturn) { --predCarriageReturn; }
5113 predCarriageReturn = 2;
5115 if (!(predCarriageReturn && tbl->
buf[tbl->
cursor] ==
'\n')) {
5130 if (0 < predCarriageReturn) { ++predCarriageReturn; }
5134 tblbuf_offset = tbl->
cursor; tblbuf_len = 1;
5157 if (!warned && tblbuf_len > 8192) {
5159 fprintf(stderr,
"Warning: very long cell identified beginning %c%c%c%c%c%c. Column separators may have failed to be recognised\n",
5160 tmpcell[0],tmpcell[1],tmpcell[2],tmpcell[3],tmpcell[4],tmpcell[5]);
5164 memcpy(tmpcell+tmpix,tbl->
buf+tblbuf_offset,
sizeof(
char)*tblbuf_len);
5165 tmpix += tblbuf_len;
5166 tmpcell[tmpix] =
'\0';
5168 tblbuf_offset = 0; tblbuf_len = 0;
5179 memcpy(cur.
cell,tmpcell,
sizeof(
char)*tmpix);
5180 if (0 < tblbuf_len) {
5181 memcpy(cur.
cell+tmpix,tbl->
buf+tblbuf_offset,
sizeof(
char)*tblbuf_len);
5207 while (comparison != 0 && first <= last) {
5208 if (comparison == 0) {
5211 }
else if (comparison > 0) {
5215 if (index == 0) {
return 0; }
5220 index = (first + last) / 2;
5242 size_t* queuesize) {
5244 if (*queuesize > 0) {
5286 const char** canonical_titles,
5289 size_t* queuesize) {
5290 const int ncells = 3;
5295 for (; newest < ncells; ++newest) {
5297 if ((unprocessedqueue[newest]).eof) {
5298 if (newest + 1 < ncells) {
5299 if (!((unprocessedqueue[newest]).isCellShallow)) {
GSC_FREE((unprocessedqueue[newest]).cell); }
5300 *queuesize = newest;
5304 *queuesize = newest + 1;
5306 }
else if ((unprocessedqueue[newest]).predNewline) {
5307 *queuesize = newest+1;
5314 *queuesize = newest + 1;
5318 for (
int i1 = 0; i1 < ncells; ++i1) {
5319 if (strcmp((unprocessedqueue[i1]).cell,canonical_titles[0]) == 0) {
5320 for (
int inc = 1; inc < ncells; ++inc) {
5321 int i2 = (i1 + inc) % ncells;
5322 int i3 = (i1 + (ncells - inc)) % ncells;
5323 if (strcmp((unprocessedqueue[i2]).cell,canonical_titles[1]) == 0 &&
5324 strcmp((unprocessedqueue[i3]).cell,canonical_titles[2]) == 0) {
5325 col_order[0] = i1 + 1;
5326 col_order[1] = i2 + 1;
5327 col_order[2] = i3 + 1;
5328 GSC_FREE((unprocessedqueue[0]).cell);
5329 GSC_FREE((unprocessedqueue[1]).cell);
5330 GSC_FREE((unprocessedqueue[2]).cell);
5331 unprocessedqueue[0] = unprocessedqueue[3];
5376 if (filename == NULL)
return 0;
5386 const char* titles[] = {
"marker",
"chr",
"pos"};
5387 int colnums[] = { 1, 2, 3 };
5390 printf(
"(Loading %s) Format: map file with header\n", filename);
5392 printf(
"(Loading %s) Format: map file without header\n", filename);
5394 printf(
"(Loading %s) Failure: Cannot identify the expected 3 columns of the map file\n", filename);
5398 int marker_colnum = colnums[0], chr_colnum = colnums[1], pos_colnum = colnums[2];
5400 _Bool goodrow = (header) ? 0 : 1;
5401 size_t goodrow_counter = 0;
5403 char* marker = NULL;
5404 unsigned long chr = 0;
5406 char* conversionflag;
5415 if (ncell.
cell != NULL) {
5418 buffer[goodrow_counter].name = marker;
5419 buffer[goodrow_counter].chr = chr;
5420 buffer[goodrow_counter].pos = pos;
5423 if (goodrow_counter >= buffercap) {
5427 }
else if (marker != NULL) {
5434 col += (ncell.
predCol > 0) ? 1 : 0;
5439 }
if (col == marker_colnum) {
5441 marker = ncell.
cell;
5444 }
else if (col == chr_colnum) {
5446 chr = strtoul(ncell.
cell,&conversionflag,36);
5453 }
else if (col == pos_colnum) {
5455 pos = strtod(ncell.
cell,&conversionflag);
5469 }
while (!ncell.
eof);
5473 buffer[goodrow_counter].name = marker;
5474 buffer[goodrow_counter].chr = chr;
5475 buffer[goodrow_counter].pos = pos;
5479 }
else if (marker != NULL) {
5485 printf(
"(Loading %s) %u marker(s) with map positions were loaded. Failed to parse %u line(s).\n", filename, (
unsigned int) goodrow_counter, (
unsigned int) (row - header - goodrow_counter));
5490 return goodrow_counter;
5520 if (rlist[i].
name != NULL) {
5523 if (n_joined != i) {
5524 rlist[n_joined] = rlist[i];
5590 d->
e[neweffsetindex] = effset;
5602 if (n_markers < 2) {
return; }
5610 unsigned long current_chr = markerlist[0].
chr;
5613 if (markerlist[i].
chr != current_chr) {
5615 qsort(markerlist + chr_start, i - chr_start,
5619 current_chr = markerlist[i].
chr;
5623 qsort(markerlist + chr_start, n_markers - chr_start,
5644 if (n_markers == 0)
return NO_MAP;
5647 memset(chr_nmembers,0,
sizeof(*chr_nmembers)*40);
5648 chr_nmembers[0] = 1;
5650 unsigned long current_chr = markerlist[0].
chr;
5652 while (i < n_markers && markerlist[i].
name == NULL) {
5655 if (current_chr != markerlist[i].
chr) {
5657 if (n_chr >= chr_nmemberscap) {
5659 memset(chr_nmembers+n_chr,0,
sizeof(*chr_nmembers)*n_chr);
5662 current_chr = markerlist[i].
chr;
5663 chr_nmembers[n_chr-1] = 1;
5665 ++(chr_nmembers[n_chr-1]);
5677 first_marker = current_marker;
5678 double chrdist = markerlist[first_marker + chr_nmembers[chr_ix] - 1].
pos - markerlist[first_marker].
pos;
5690 for (; current_marker < endpt; ++current_marker) {
5691 if (markerlist[current_marker].
name == NULL) {
5701 first_marker = current_marker;
5702 firsts_coord_in_genome = coord;
5703 lgdists[n_goodmembers] = (markerlist[current_marker].
pos - markerlist[first_marker].
pos) / chrdist;
5706 }
else if (firsts_coord_in_genome + n_goodmembers < d->genome.n_markers &&
5707 strcmp(markerlist[current_marker].
name, d->
genome.
marker_names[firsts_coord_in_genome + n_goodmembers]) == 0) {
5709 lgdists[n_goodmembers] = (markerlist[current_marker].
pos - markerlist[first_marker].
pos) / chrdist;
5714 for (
GSC_GENOLEN_T backfill = 0; backfill < n_goodmembers; ++backfill) {
5715 marker_coords[backfill] = firsts_coord_in_genome + backfill;
5721 for (; current_marker < endpt; ++current_marker) {
5722 if (markerlist[current_marker].
name == NULL) {
5730 marker_coords[n_goodmembers] = coord;
5731 lgdists[n_goodmembers] = (markerlist[current_marker].
pos - markerlist[first_marker].
pos) / chrdist;
5736 if (n_goodmembers == 0) {
5738 }
else if (marker_coords == NULL) {
5740 map.
chrs[chr_ix_actual].
type = GSC_LINKAGEGROUP_SIMPLE;
5747 map.
chrs[chr_ix_actual].
type = GSC_LINKAGEGROUP_REORDER;
5756 if (map.
n_chr == 0) {
5782 double expected_n_recombinations) {
5785 if (markernames == NULL) {
5792 map.
chrs[0].
type = GSC_LINKAGEGROUP_SIMPLE;
5798 if (n_markers == 0)
return NO_MAP;
5801 _Bool found_first = 0;
5808 if (!found_first || marker_coords != NULL) {
5812 if (markernames[i] == NULL) {
5816 }
else if (!found_first) {
5818 firsts_coord_in_genome = coord;
5821 marker_coords[chrmarker_ix] = coord;
5825 }
else if (firsts_coord_in_genome < d->genome.n_markers &&
5833 for (
GSC_GENOLEN_T backfill = 0; backfill < chrmarker_ix; ++backfill) {
5834 marker_coords[backfill] = firsts_coord_in_genome + backfill;
5837 if (markernames[i] == NULL) {
5848 double lgdist = 1./n_markers;
5849 for (
GSC_GENOLEN_T i = 0; i < chrmarker_ix; ++i) { lgdists[i] = lgdist; }
5851 if (marker_coords == NULL) {
5852 map.
chrs[0].
type = GSC_LINKAGEGROUP_SIMPLE;
5858 map.
chrs[0].
type = GSC_LINKAGEGROUP_REORDER;
5907 if (filename == NULL)
return NO_MAP;
5911 if (nrows == 0 || mapcontents == NULL) {
5912 if (mapcontents != NULL) {
5918 _Bool freeMapNames = 1;
5919 if (d->genome.n_markers > 0) {
5922 if (new_nrows < nrows) {
5923 printf(
"Discarded %lu markers when loading map %s because they do not appear in the primary map.\n", (
long unsigned int) (nrows - new_nrows), filename);
5939 d->genome.marker_names[i] = mapcontents[i].
name;
5940 d->genome.names_alphabetical[i] = &(d->genome.marker_names[i]);
5950 for (
size_t i = 0; i < nrows; ++i) {
6006 const char* titles[] = {
"marker",
"allele",
"eff"};
6007 int colnums[] = { 1, 2, 3 };
6011 printf(
"(Loading %s) Format: effect file with header\n", filename);
6013 printf(
"(Loading %s) Format: effect file without header\n", filename);
6015 printf(
"(Loading %s) Failure: Cannot identify the expected 3 columns of the effect file\n", filename);
6019 int marker_colnum = colnums[0], allele_colnum = colnums[1], eff_colnum = colnums[2];
6022 _Bool goodrow = (header) ? 0 : 1;
6023 size_t goodrow_counter = 0;
6032 GSC_ID_T alleleix = n_effset_rows + 1;
6034 char* conversionflag;
6041 if (ncell.
cell != NULL) {
6044 if (goodrow && col >= 3) {
6045 if (alleleix == n_effset_rows) {
6048 if (effset_allelescap < n_effset_rows) {
6052 effset_alleles[alleleix] = allele;
6056 if (alleleix < n_effset_rows) {
6057 effset_rows[alleleix][markerix] = effect;
6065 col += (ncell.
predCol > 0) ? 1 : 0;
6070 }
else if (col == marker_colnum) {
6079 }
else if (col == allele_colnum) {
6084 allele = ncell.
cell[0];
6085 for (alleleix = 0; alleleix < n_effset_rows; ++alleleix) {
6086 if (effset_alleles[alleleix] == allele) {
6091 }
else if (col == eff_colnum) {
6093 effect = strtod(ncell.
cell,&conversionflag);
6107 }
while (!ncell.
eof);
6109 if (col == 3 && goodrow) {
6111 if (alleleix == n_effset_rows) {
6114 if (effset_allelescap < n_effset_rows) {
6118 effset_alleles[alleleix] = allele;
6122 effset_rows[alleleix][markerix] = effect;
6125 printf(
"(Loading %s) %lu effect value(s) spanning %lu allele(s) were loaded. Failed to parse %lu line(s).\n",
6126 filename, (
long unsigned int) goodrow_counter, (
long unsigned int) allele_counter, (
long unsigned int) (row - header - goodrow_counter));
6129 if (n_effset_rows > 0) {
6152 switch (c.
cell[0]) {
6174 if (c.
cell[0] ==
'm') {
6175 switch (c.
cell[1]) {
6193 if (c.
cell[1] ==
'/') {
6213 char*
pos = loc.localAM->alleles[loc.localPos] + 2*markerix;
6231 phase = rnd_pcg_range(&forrng->
rng,0,1);
6260 phase = rnd_pcg_range(&forrng->
rng,0,1);
6265 phase = rnd_pcg_range(&forrng->
rng,0,1);
6270 phase = rnd_pcg_range(&forrng->
rng,0,1);
6275 phase = rnd_pcg_range(&forrng->
rng,0,1);
6280 phase = rnd_pcg_range(&forrng->
rng,0,1);
6285 phase = rnd_pcg_range(&forrng->
rng,0,1);
6304 .alloctogroup = allocation_group,
6355 fprintf(stderr,
"EmptyListNavigator invalid\n");
6376 if (NULL == it->
d->
m) {
6380 while (NULL != listend->
next) {
6381 listend = listend->
next;
6418 const size_t firstrowlen,
6419 const size_t queuelen,
6421 const char* filenameforlog) {
6423 if (format.markers_as_rows ==
GSC_TRUE || format.markers_as_rows ==
GSC_FALSE) {
6425 }
else if (d->genome.n_maps == 0) {
6429 printf(
"(Loading %s) Format axis: genetic markers are -rows-, founder lines are |columns| (by assumption when no genetic map is loaded)\n", filenameforlog);
6430 printf(
"(Loading %s) No genetic map is loaded, will invent a map with equal spacing of these genetic markers (1cM apart)\n", filenameforlog);
6433 }
else if (format.has_header ==
GSC_FALSE) {
6434 printf(
"(Loading %s) Format axis: genetic markers are -rows-, founder lines are |columns| "
6435 "(by assumption when matrix has no header row)\n", filenameforlog);
6441 int firstsafeheaderindex = -1;
6442 if (firstrowlen > 1) {
6443 firstsafeheaderindex = 1;
6444 }
else if (firstrowlen == 1 && queuelen > firstrowlen + 1) {
6445 firstsafeheaderindex = 0;
6449 if (firstsafeheaderindex >= 0) {
6453 printf(
"(Loading %s) Format axis: genetic markers are |columns|, founder lines are -rows-\n", filenameforlog);
6460 if (queuelen > firstrowlen && !cellqueue[firstrowlen].eof &&
6462 printf(
"(Loading %s) Format axis: genetic markers are -rows-, founder lines are |columns|\n", filenameforlog);
6468 for (
size_t i = firstsafeheaderindex + 1; i < firstrowlen; ++i) {
6470 printf(
"(Loading %s) Format axis: genetic markers are |columns|, founder lines are -rows-\n", filenameforlog);
6478 printf(
"(Loading %s) Format axis: genetic markers are -rows-, founder lines are |columns| (by default file format)\n", filenameforlog);
6498 const size_t firstrowlen,
6499 const size_t queuelen,
6501 const char* filenameforlog) {
6503 _Bool style_detected = 0;
6504 _Bool single_col_file = 0;
6509 if (firstrowlen == queuelen || cellqueue[firstrowlen].eof) {
6511 if (firstrowlen > 1) {
6515 single_col_file = 1;
6519 if (firstrowlen + 1 < queuelen && cellqueue[firstrowlen+1].predNewline < 1) {
6523 single_col_file = 1;
6529 if (style_detected) {
6530 switch(format.cell_style) {
6533 case GSC_GENOTYPECELLSTYLE_COUNT: printf(
"(Loading %s) Allele format: reference allele counts (phase will be randomised)\n", filenameforlog);
break;
6534 case GSC_GENOTYPECELLSTYLE_ENCODED: printf(
"(Loading %s) Allele format: IUPAC encoded pair (phase will be randomised)\n", filenameforlog);
break;
6536 if (single_col_file || firstrowlen == queuelen ||
6537 (firstrowlen + 1 == queuelen && cellqueue[firstrowlen].eof && cellqueue[firstrowlen].cell_len == 0)) {
6538 printf(
"(Loading %s) Warning: empty genotype matrix. No genotypes will be loaded.\n", filenameforlog);
6540 fprintf(stderr,
"(Loading %s) Failure: Unable to determine the formatting of pairs of alleles."
6541 " Check genomicSimulation manual for accepted allele pair encodings\n", filenameforlog);
6578 const size_t firstrowlen,
6579 const size_t queuelen,
6581 const char* filenameforlog) {
6584 printf(
"(Loading %s) Failure: genetic markers cannot be represented by columns when matrix has no header row\n", filenameforlog);
6585 format.has_header =
GSC_NA;
6591 if (firstrowlen == 1) {
6595 if (cellqueue[2].eof || cellqueue[2].predNewline) {
6605 for (
size_t i = 1; i < firstrowlen; ++i) {
6613 switch (format.has_header) {
6614 case GSC_FALSE: printf(
"(Loading %s) Format: genotype matrix without header row\n", filenameforlog);
break;
6615 case GSC_TRUE: printf(
"(Loading %s) Format: genotype matrix with header row\n", filenameforlog);
break;
6616 default: fprintf(stderr,
"(Loading %s) Failure: Unable to determine whether file has header row\n", filenameforlog);
break;
6639 const size_t ncellsfirstrow,
6640 const size_t ncellssecondrow,
6641 const _Bool secondrowheaderisempty) {
6642 if (ncellssecondrow == ncellsfirstrow + 1) {
6644 }
else if (ncellssecondrow == ncellsfirstrow) {
6645 if (secondrowheaderisempty) {
6650 }
else if (ncellssecondrow == ncellsfirstrow - 1 && secondrowheaderisempty) {
6707 const char* filename,
6709 if (filename == NULL)
return NO_GROUP;
6711 fprintf(stderr,
"Non-genotype-matrix format specification provided to genotype matrix file loader function\n");
6721 size_t queuesize = 0;
6726 size_t ncellsread = 0;
6731 if (ncellsread >= cellsreadcap) {
6734 }
while (!cellsread[ncellsread-1].eof && (ncellsread <= 1 || !cellsread[ncellsread-1].predNewline));
6735 size_t ncellsfirstrow = (cellsread[ncellsread-1].eof && cellsread[ncellsread-1].cell_len > 0) ? ncellsread : ncellsread - 1;
6736 if (!cellsread[ncellsread-1].eof) {
6740 if (ncellsread >= cellsreadcap) {
6744 queuesize = ncellsread;
6745 if (ncellsread <= 1) {
6749 int is_onerow_file = ncellsread == ncellsfirstrow || cellsread[ncellsfirstrow].eof;
6764 while (!cellsread[ncellsread-1].eof && !cellsread[ncellsread-1].predNewline) {
6768 if (ncellsread >= cellsreadcap) {
6773 queuesize = ncellsread;
6774 size_t ncellssecondrow = ncellsread - ncellsfirstrow - 1;
6776 if (format_has_corner_cell ==
GSC_NA) {
6777 fprintf(stderr,
"(Loading %s) Failure: Header row length and second row length do not align\n", filename);
6786 cellqueue = cellsread + ncellsfirstrow;
6787 queuesize = ncellsread - ncellsfirstrow;
6791 _Bool build_map_from_rows = 0;
6794 build_map_from_rows = 1;
6802 }
while (!cell.
eof);
6815 fprintf(stderr,
"(Loading %s) Failure: Genotype matrix with markers as columns but no header row is an unsupported file type (there is no way to tell which column is which marker)\n", filename);
6819 size_t i = format_has_corner_cell ? 1 : 0;
6826 cellsread[i].isCellShallow =
GSC_TRUE;
6844 n_cols = (format_detected.
has_header) ? ncellsfirstrow + 1 : ncellsfirstrow;
6852 if (ncell.
cell != NULL) {
6856 if (build_map_from_rows) {
6857 have_valid_marker = 1;
6870 nvalidmarker += have_valid_marker;
6873 if (row == 1 && format_detected.
has_header) {
6874 if (column + 1 != ncellsfirstrow && column + 1 != ncellsfirstrow + 1) {
6875 fprintf(stderr,
"(Loading %s) Failure: Header row length and second row length do not align\n", filename);
6878 n_cols = column + 1;
6887 if (have_valid_marker && column < n_cols) {
6895 }
while (!ncell.
eof);
6896 if (row == 1 && format_detected.
has_header) {
6897 if (column + 1 != ncellsfirstrow && column + 1 != ncellsfirstrow + 1) {
6898 fprintf(stderr,
"(Loading %s) Failure: Header row length and second row length do not align\n", filename);
6901 n_cols = column + 1;
6908 size_t i = format_has_corner_cell ? 1 : 0;
6910 for (
size_t j = 0; i < ncellsfirstrow; ++i, ++j) {
6914 cellsread[i].isCellShallow =
GSC_TRUE;
6919 if (build_map_from_rows) {
6928 fprintf(stderr,
"(Loading %s) Failure: Genotype matrix with markers as columns but no header row is an unsupported file type (there is no way to tell which column is which marker)\n", filename);
6933 size_t i = format_has_corner_cell ? 1 : 0;
6934 size_t n_col = ncellsfirstrow + (1-i);
6950 if (ncell.
cell != NULL) {
6975 }
while (!ncell.
eof);
6986 }
while ((tmpam = tmpam->next) != NULL);
6987 printf(
"(Loading %s) %lu genotype(s) of %lu marker(s) were loaded.\n", filename,
6988 (
long unsigned int) ngenos, (
long unsigned int) nvalidmarker);
6998 for (
size_t j = 0; j < ncellsfirstrow; ++j) {
6999 if (!cellsread[j].isCellShallow) {
GSC_FREE(cellsread[j].cell); }
7009 for (
size_t i = 1; i <= queuesize; ++i) {
7010 if (!cellsread[ncellsread-i].isCellShallow) {
7011 GSC_FREE(cellsread[ncellsread-i].cell);
7012 cellsread[ncellsread-i].isCellShallow =
GSC_TRUE;
7017 for (
size_t j = 0; j < ncellsfirstrow; ++j) {
7018 if (!cellsread[j].isCellShallow) {
GSC_FREE(cellsread[j].cell); }
7044 const char* filename,
7068 const char* genotype_file,
7069 const char* map_file,
7070 const char* effect_file,
7077 char* suffix = strrchr(genotype_file,
'.');
7078 if (suffix != NULL) {
7079 if (strcmp(suffix,
".bed") == 0) {
7081 }
else if (strcmp(suffix,
".ped") == 0) {
7083 }
else if (strcmp(suffix,
".vcf") == 0) {
7094 fprintf(stderr,
"plink .bed file parsing not yet implemented\n");
7097 fprintf(stderr,
"plink .ped file parsing not yet implemented\n");
7100 fprintf(stderr,
"vcf file parsing not yet implemented\n");
7149 unsigned int p2num,
char* offspring,
int certain) {
7151 fprintf(stderr,
"Need at least one recombination map loaded to estimate recombinations\n");
7157 fprintf(stderr,
"We don't have that recombination maps loaded\n");
7164 int p1match, p2match;
7168 for (
int chr = 0; chr <
map.n_chr; ++chr) {
7171 switch (
map.chrs[chr].type) {
7172 case GSC_LINKAGEGROUP_SIMPLE:
7173 for (
int i = 0; i <
map.chrs[chr].map.simple.n_markers; ++i) {
7176 if (p1match && !p2match) {
7177 origins[
map.chrs[chr].map.simple.first_marker_index + i] = p1num;
7179 }
else if (p2match && !p1match) {
7180 origins[
map.chrs[chr].map.simple.first_marker_index + i] = p2num;
7184 origins[
map.chrs[chr].map.simple.first_marker_index + i] = 0;
7186 origins[
map.chrs[chr].map.simple.first_marker_index + i] = previous;
7192 case GSC_LINKAGEGROUP_REORDER:
7193 for (
int i = 0; i <
map.chrs[chr].map.reorder.n_markers; ++i) {
7196 if (p1match && !p2match) {
7197 origins[
map.chrs[chr].map.reorder.marker_indexes[i]] = p1num;
7199 }
else if (p2match && !p1match) {
7200 origins[
map.chrs[chr].map.reorder.marker_indexes[i]] = p2num;
7204 origins[
map.chrs[chr].map.reorder.marker_indexes[i]] = 0;
7206 origins[
map.chrs[chr].map.reorder.marker_indexes[i]] = previous;
7258 unsigned int p2num,
char* offspring,
int window_size,
int certain) {
7260 fprintf(stderr,
"Need at least one recombination map loaded to estimate recombinations\n");
7266 fprintf(stderr,
"We don't have that recombination maps loaded\n");
7274 int p1match, p2match;
7275 int previous = 0, window_range = (window_size - 1)/2, i;
7277 for (
int chr = 0; chr <
map.n_chr; ++chr) {
7280 switch (
map.chrs[chr].type) {
7281 case GSC_LINKAGEGROUP_SIMPLE:
7282 for (i = 0; i < window_range; ++i) {
7283 origins[
map.chrs[chr].map.simple.first_marker_index + i] = 0;
7285 for (; i <
map.chrs[chr].map.simple.n_markers - window_range; ++i) {
7288 if (p1match && !p2match) {
7289 origins[
map.chrs[chr].map.simple.first_marker_index + i] = p1num;
7291 }
else if (p2match && !p1match) {
7292 origins[
map.chrs[chr].map.simple.first_marker_index + i] = p2num;
7296 origins[
map.chrs[chr].map.simple.first_marker_index + i] = 0;
7298 origins[
map.chrs[chr].map.simple.first_marker_index + i] = previous;
7302 for (; i <
map.chrs[chr].map.simple.n_markers; ++i) {
7303 origins[
map.chrs[chr].map.simple.first_marker_index + i] = 0;
7307 case GSC_LINKAGEGROUP_REORDER:
7308 for (i = 0; i < window_range; ++i) {
7309 origins[
map.chrs[chr].map.reorder.marker_indexes[i]] = 0;
7311 for (; i <
map.chrs[chr].map.reorder.n_markers - window_range; ++i) {
7314 if (p1match && !p2match) {
7315 origins[
map.chrs[chr].map.reorder.marker_indexes[i]] = p1num;
7317 }
else if (p2match && !p1match) {
7318 origins[
map.chrs[chr].map.reorder.marker_indexes[i]] = p2num;
7322 origins[
map.chrs[chr].map.reorder.marker_indexes[i]] = 0;
7324 origins[
map.chrs[chr].map.reorder.marker_indexes[i]] = previous;
7328 for (; i <
map.chrs[chr].map.reorder.n_markers; ++i) {
7329 origins[
map.chrs[chr].map.reorder.marker_indexes[i]] = 0;
7375 int window_len,
int certain) {
7379 if ((fp = fopen(input_file,
"r")) == NULL) {
7380 fprintf(stderr,
"Failed to open file %s.\n", input_file); exit(1);
7383 if ((fpo = fopen(output_file,
"w")) == NULL) {
7384 fprintf(stderr,
"Failed to open file %s.\n", output_file); exit(1);
7393 char* combin_genes[3];
7397 for (
int i = 0; i < t.
num_rows; ++i) {
7399 fscanf(fp,
"%s %s %s \n", buffer[0], buffer[1], buffer[2]);
7403 if (combin_i[0] < 0 || combin_i[1] < 0 || combin_i[2] < 0) {
7404 fprintf(stderr,
"Genotypes at file %s line %lu could not be found\n", input_file, (
long unsigned int) i);
7411 if (window_len == 1) {
7420 fprintf(fpo,
"\n%s", buffer[0]);
7422 fprintf(fpo,
"\t%d", r[j]);
7428 fwrite(
"\n",
sizeof(
char), 1, fpo);
7465 const char* parent_genome,
7469 if (parent_genome == NULL) {
7470 fprintf(stderr,
"Could not generate this gamete: no parent provided\n");
7474 fprintf(stderr,
"Could not generate this gamete: invalid map provided\n");
7486 case GSC_LINKAGEGROUP_SIMPLE:
7489 case GSC_LINKAGEGROUP_REORDER:
7493 fprintf(stderr,
"Linkage group type of linkage group with index %lu of map with index %lu is corrupted\n",
7494 (
long unsigned int) chr, (
long unsigned int) map_index);
7499 if (num_crossovers > crossover_wherecap) {
7502 for (
int i = 0; i < num_crossovers; ++i) {
7503 crossover_where[i] = ((double)rand() / (double)RAND_MAX);
7505 if (num_crossovers > 1) {
7510 int which = rnd_pcg_range(&d->
rng,0,1);
7511 int up_to_crossover = 0;
7513 case GSC_LINKAGEGROUP_SIMPLE:
7516 while (up_to_crossover < num_crossovers &&
7525 case GSC_LINKAGEGROUP_REORDER:
7528 while (up_to_crossover < num_crossovers &&
7564 const char* parent_genome,
7572 if (parent_genome == NULL) {
7573 fprintf(stderr,
"Could not make this doubled haploid\n");
7577 fprintf(stderr,
"Could not generate this gamete: invalid map provided\n");
7589 case GSC_LINKAGEGROUP_SIMPLE:
7592 case GSC_LINKAGEGROUP_REORDER:
7596 fprintf(stderr,
"Linkage group type of group with index %lu of map with index %lu is corrupted\n",
7597 (
long unsigned int) chr, (
long unsigned int) map_index);
7602 if (num_crossovers > crossover_wherecap) {
7605 for (
int i = 0; i < num_crossovers; ++i) {
7606 crossover_where[i] = ((double)rand() / (double)RAND_MAX);
7608 if (num_crossovers > 1) {
7613 int which = rnd_pcg_range(&d->
rng,0,1);
7614 int up_to_crossover = 0;
7616 case GSC_LINKAGEGROUP_SIMPLE:
7619 while (up_to_crossover < num_crossovers &&
7625 output[2*pos] = parent_genome[2*pos + which];
7626 output[2*pos + 1] = output[2*pos];
7629 case GSC_LINKAGEGROUP_REORDER:
7632 while (up_to_crossover < num_crossovers &&
7638 output[2*pos] = parent_genome[2*pos + which];
7639 output[2*pos + 1] = output[2*pos];
7662 const char* parent_genome,
7665 output[2*j] = parent_genome[2*j];
7666 output[2*j + 1] = parent_genome[2*j + 1];
7680 strcpy(tmpname_p,
"out");
7682 strcat(tmpname_p,
"-pedigree.txt");
7683 fp = fopen(tmpname_p,
"w");
7704 strcpy(tmpname_b,
"out");
7706 strcat(tmpname_b,
"-bv.txt");
7707 fe = fopen(tmpname_b,
"w");
7722 strcpy(tmpname_g,
"out");
7724 strcat(tmpname_g,
"-genotype.txt");
7725 fg = fopen(tmpname_g,
"w");
7870 void* parentIterator,
7872 int (*parentChooser)(
void*,
7882 parentChooser == NULL || offspringGenerator == NULL) {
7897 while (last->
next != NULL) {
7911 while (parentChooser(parentIterator, datastore, &counter, parents)) {
7925 last->
next = offspring;
7934 offspringGenerator(d, datastore, parents, offspringPos);
7935 offspring->
groups[fullness] = output_group;
7955 last->
next = offspring;
7958 return output_group;
7982 if (*counter < datastore->rand.n_crosses &&
7996 if (datastore->
rand.
cap > 0) {
7997 datastore->
rand.
uses[parentixs[0]] += 1;
7998 datastore->
rand.
uses[parentixs[1]] += 1;
8046 fprintf(stderr,
"Group %lu does not exist\n", (
long unsigned int) from_group.
num);
8050 if (n_crosses < 1) {
8051 fprintf(stderr,
"Invalid n_crosses value provided: n_crosses must be greater than 0\n");
8056 fprintf(stderr,
"Invalid cap value provided: cap can't be negative\n");
8059 if (cap > 0 && cap*g_size < n_crosses) {
8060 fprintf(stderr,
"Invalid cap value provided: cap of %lu uses on %lu parents too small to make %lu crosses\n",
8061 (
long unsigned int) cap, (
long unsigned int) g_size, (
long unsigned int) n_crosses);
8102 }
else if (g_size == 1) {
8103 fprintf(stderr,
"Group %lu must contain multiple individuals to be able to perform random crossing\n",
8104 (
long unsigned int) from_group.
num);
8108 fprintf(stderr,
"Crossing requires at least one recombination map loaded\n");
8128 fprintf(stderr,
"Could not find recombination map with identifier %lu\n", (
long unsigned int) which_map.
id);
8171 if (max < 1 || (max == 1 && noCollision == 0)) {
8174 if (max > INT_MAX) {
8175 fprintf(stderr,
"Drawing a random number with a max of %lu is not supported on the C version"
8176 "with the rnd library. If the max is greater than %d, probabilistic uniformity may be lost"
8177 "or an infinite loop may occur.", (
long unsigned int) max, INT_MAX);
8183 parentix = rnd_pcg_range(&d->
rng,0,max - 1);
8184 }
while (parentix == noCollision || member_uses[parentix] >=
cap);
8187 parentix = rnd_pcg_range(&d->
rng,0,max - 1);
8188 }
while (parentix == noCollision);
8210 size_t parentixs[2] = { 0 };
8212 if (*counter < datastore->
rand_btwn.n_crosses &&
8294 fprintf(stderr,
"Crossing requires at least one recombination map loaded\n");
8325 fprintf(stderr,
"Could not find recombination map with identifier %lu\n", (
long unsigned int) map1.
id);
8332 fprintf(stderr,
"Could not find recombination map with identifier %lu\n", (
long unsigned int) map2.
id);
8367 while (*counter < datastore->
targeted.n_crosses) {
8421 const size_t n_combinations,
8427 if (n_combinations < 1) {
8428 fprintf(stderr,
"Invalid n_combinations value provided: n_combinations must be greater than 0\n");
8432 fprintf(stderr,
"Crossing requires at least one recombination map loaded\n");
8449 fprintf(stderr,
"Could not find recombination map with identifier %lu\n", (
long unsigned int) map1.
id);
8456 fprintf(stderr,
"Could not find recombination map with identifier %lu\n", (
long unsigned int) map2.
id);
8469 fprintf(stderr,
"Targeted crossing failed for %lu out of the %lu requested pairings due to one or both genotype indexes being invalid\n", (
long unsigned int) paramstore.
targeted.
bad_pairings, (
long unsigned int) n_combinations);
8494 parents[1] = parents[0];
8523 int n_oddness = n % 2;
8524 for (
unsigned int i = 0; i < n; ++i) {
8525 if (i % 2 == n_oddness) {
8528 tmpparent = tmpchild;
8562 const unsigned int n,
8572 fprintf(stderr,
"Invalid n value provided: Number of generations must be greater than 0\n");
8576 fprintf(stderr,
"Selfing requires at least one recombination map loaded\n");
8588 fprintf(stderr,
"Could not find recombination map with identifier %lu\n", (
long unsigned int) which_map.
id);
8615 parents[0].mapindex);
8647 fprintf(stderr,
"Crossing requires at least one recombination map loaded\n");
8657 fprintf(stderr,
"Could not find recombination map with identifier %lu\n", (
long unsigned int) which_map.
id);
8687 parents[1] = parents[0];
8790 fprintf(stderr,
"Group %lu does not have enough members to perform crosses\n", (
long unsigned int) from_group.
num);
8792 fprintf(stderr,
"Group %lu does not exist\n", (
long unsigned int) from_group.
num);
8810 combinations[0][cross_index] = group_indexes[i];
8811 combinations[1][cross_index] = group_indexes[j];
8826 fprintf(stderr,
"Function gsc_make_n_crosses_from_top_m_percent is deprecated."
8827 "It behaved unintuitively and goes against genomicSimulation principles on division of functionality\n");
8859 const char* input_file,
8865 fprintf(stderr,
"No crosses exist in that file\n");
8871 if ((fp = fopen(input_file,
"r")) == NULL) {
8872 fprintf(stderr,
"Failed to open file %s.\n", input_file); exit(1);
8881 for (
int filei = 0; filei < t.
num_rows; ++filei) {
8883 fscanf(fp,
"%s %s \n", buffer[0], buffer[1]);
8886 if (combinations[0][bufferi] < 0 || combinations[1][bufferi] < 0) {
8887 fprintf(stderr,
"Parents on file %s line %lu could not be found\n", input_file, (
long unsigned int) filei);
8933 const char* input_file,
8939 fprintf(stderr,
"No crosses exist in that file\n");
8945 if ((fp = fopen(input_file,
"r")) == NULL) {
8946 fprintf(stderr,
"Failed to open file %s.\n", input_file); exit(1);
8953 const char* to_buffer[] = {buffer[0], buffer[1], buffer[2], buffer[3]};
8959 fscanf(fp,
"%s %s %s %s \n", buffer[0], buffer[1], buffer[2], buffer[3]);
8962 fprintf(stderr,
"Could not go ahead with the line %lu cross - g0 names not in records\n",
8963 (
long unsigned int) i);
8972 if (f1_i[0] < 0 || f1_i[1] < 0) {
8976 if (f1_i[0] < 0 || f1_i[1] < 0) {
8979 if (f1_i[0] < 0 || f1_i[1] < 0) {
8980 fprintf(stderr,
"Could not go ahead with the line %lu cross - f1 children do not exist for this quartet\n",
8981 (
long unsigned int) i);
8990 combinations[0][i] = f1_i[0];
8991 combinations[1][i] = f1_i[1];
9024 const _Bool lowIsBest) {
9027 fprintf(stderr,
"Either effect matrix or allele matrix does not exist\n");
9032 if (group_size == 0) {
9033 fprintf(stderr,
"Group %lu does not exist\n", (
long unsigned int) group.
num);
9039 if (group_size <= top_n) {
9050 for (
size_t i = 0; i < fits.
cols; i++) {
9051 p_fits[i] = &(fits.
matrix[0][i]);
9064 top_individuals[i] = group_indexes[p_fits[i] - fits.
matrix[0]];
9098 fprintf(stderr,
"Effect matrix does not exist\n");
9125 if (targets == NULL || effset == NULL) {
9126 fprintf(stderr,
"Either targets or marker effects were not provided\n");
9134 if (n_genotypes >= genotypescap) {
9159 if (i < effset->effects.rows) {
9190 const char allele) {
9199 if (n_genotypes >= genotypescap) {
9234 const char**
const genotypes,
9237 if (genotypes == NULL || counts == NULL ||
9238 counts->
rows < n_genotypes ||
9239 counts->
cols < n_markers) {
9240 fprintf(stderr,
"Inputs for calculating count matrix are improperly sized: calculation cannot proceed\n");
return;
9245 if (genotypes[i] == NULL) {
9251 if (genotypes[i][2*j] == allele) { ++cell_sum; }
9252 if (genotypes[i][2*j + 1] == allele) { ++cell_sum; }
9253 counts->
matrix[i][j] = cell_sum;
9282 const char**
const genotypes,
9287 if (genotypes == NULL || counts == NULL || counts2 == NULL ||
9288 counts->
rows < n_genotypes || counts2->
rows < n_genotypes ||
9289 counts->
cols < n_markers || counts2->
cols < n_markers) {
9290 fprintf(stderr,
"Inputs for calculating count matrix are improperly sized: calculation cannot proceed\n");
return;
9295 if (genotypes[i] == NULL) {
9302 if (genotypes[i][2*j] == allele) { ++cell_sum; }
9303 else if (genotypes[i][2*j] == allele2) { ++cell_sum2;}
9304 if (genotypes[i][2*j + 1] == allele) { ++cell_sum; }
9305 else if (genotypes[i][2*j + 1] == allele2) { ++cell_sum2;}
9306 counts->
matrix[i][j] = cell_sum;
9307 counts2->
matrix[i][j] = cell_sum2;
9347 fprintf(stderr,
"Creating blocks by chromosome length requires at least one recombination map loaded\n");
9353 fprintf(stderr,
"We don't have that recombination maps loaded. Using default map\n");
9359 fprintf(stderr,
"Invalid n value: number of blocks must be positive\n");
9362 if (map.
n_chr < 1) {
9363 fprintf(stderr,
"Map has no chromosomes, so it cannot be divided into blocks\n");
9378 size_t current_block_filling = 0;
9384 case GSC_LINKAGEGROUP_SIMPLE:
9394 while (current_block_filling < n - 1 && chrpos > current_block_filling / n) {
9395 GSC_ID_T b = chr*n + current_block_filling;
9402 ++current_block_filling;
9407 if (bi >= temp_markers_in_blockcap) {
9416 GSC_ID_T b = chr*n + current_block_filling;
9425 case GSC_LINKAGEGROUP_REORDER:
9435 while (current_block_filling < n - 1 && chrpos > current_block_filling / n) {
9436 GSC_ID_T b = chr*n + current_block_filling;
9443 ++current_block_filling;
9448 if (bi >= temp_markers_in_blockcap) {
9457 GSC_ID_T b = chr*n + current_block_filling;
9504 if ((infile = fopen(block_file,
"r")) == NULL) {
9505 fprintf(stderr,
"Failed to open file %s.\n", block_file); exit(1);
9515 fscanf(infile,
"%*[^\n]\n");
9518 while (fscanf(infile,
"%*d %*f %*s %*s ") != EOF) {
9530 memset(markerbuffer, 0,
sizeof(*markerbuffer) * bufferlen);
9531 while ((c = fgetc(infile)) != EOF && c !=
'\n') {
9533 markername[ni] =
'\0';
9539 markerbuffer[mi] = markerindex;
9596 const char* output_file,
9600 fprintf(stderr,
"Nonexistent effect set with id %lu\n", (
long unsigned int) effID.
id);
9606 if ((outfile = fopen(output_file,
"w")) == NULL) {
9607 fprintf(stderr,
"Failed to open file %s.\n", output_file); exit(1);
9625 if (gnames[i] != NULL) {
9626 sprintf(buffer,
"%s_1", gnames[i]);
9628 sprintf(buffer,
"%lu_1", (
long unsigned int) gids[i].
id);
9630 fwrite(buffer,
sizeof(
char), strlen(buffer), outfile);
9646 fprintf(outfile,
" %lf", beffect);
9650 if (gnames[i] != NULL) {
9651 sprintf(buffer,
"\n%s_2", gnames[i]);
9653 sprintf(buffer,
"\n%lu_2", (
long unsigned int) gids[i].
id);
9655 fwrite(buffer,
sizeof(
char), strlen(buffer), outfile);
9670 fprintf(outfile,
" %lf", beffect);
9673 fwrite(
"\n",
sizeof(
char), 1, outfile);
9713 const char* output_file) {
9716 fprintf(stderr,
"Nonexistent effect set with id %lu\n", (
long unsigned int) effID.
id);
9722 if ((outfile = fopen(output_file,
"w")) == NULL) {
9723 fprintf(stderr,
"Failed to open file %s.\n", output_file); exit(1);
9734 for (
GSC_LOCALX_T i = 0; i < m->n_genotypes; ++i, ++total_i) {
9736 if (m->names[i] != NULL) {
9737 sprintf(buffer,
"%s_1", m->names[i]);
9739 sprintf(buffer,
"%lu_1", (
long unsigned int) m->ids[i].id);
9741 fwrite(buffer,
sizeof(
char), strlen(buffer), outfile);
9757 fprintf(outfile,
" %lf", beffect);
9761 if (m->names[i] != NULL) {
9762 sprintf(buffer,
"\n%s_2", m->names[i]);
9764 sprintf(buffer,
"\n%lu_2", (
long unsigned int) m->ids[i].id);
9766 fwrite(buffer,
sizeof(
char), strlen(buffer), outfile);
9781 fprintf(outfile,
" %lf", beffect);
9784 fwrite(
"\n",
sizeof(
char), 1, outfile);
9786 }
while ((m = m->next) != NULL);
9812 fprintf(stderr,
"Nonexistent effect set with id %lu\n", (
long unsigned int) effID.
id);
9828 optimal[i] = best_allele;
9855 fprintf(stderr,
"Nonexistent effect set with id %lu\n", (
long unsigned int) effID.
id);
9863 fprintf(stderr,
"Nonexistent group %lu\n", (
long unsigned int) group.
num);
9873 char best_allele =
'\0';
9878 if (ggenes[i][2*j] != best_allele) {
9883 (best_allele ==
'\0' || e.
effects.
matrix[a][j] > best_score)) {
9885 best_allele = ggenes[i][2*j];
9895 if (ggenes[i][2*j + 1] != best_allele) {
9900 (best_allele ==
'\0' || e.
effects.
matrix[a][j] > best_score)) {
9902 best_allele = ggenes[i][2*j + 1];
9911 optimal[j] = best_allele;
9934 fprintf(stderr,
"Nonexistent effect set with id %lu\n", (
long unsigned int) effID.
id);
9939 double best_gebv = 0;
9951 best_gebv += (2*best_score);
9978 fprintf(stderr,
"Nonexistent effect set with id %lu\n", (
long unsigned int) effID.
id);
9987 fprintf(stderr,
"Nonexistent group %lu\n", (
long unsigned int) group.
num);
9993 double total_score = 0;
10004 if (ggenes[i][2*j] != best_allele) {
10009 (best_allele ==
'\0' || e.
effects.
matrix[a][j] > best_score)) {
10011 best_allele = ggenes[i][2*j];
10021 if (ggenes[i][2*j + 1] != best_allele) {
10026 (best_allele ==
'\0' || e.
effects.
matrix[a][j] > best_score)) {
10028 best_allele = ggenes[i][2*j + 1];
10037 total_score += (2*best_score);
10041 return total_score;
10058 fprintf(stderr,
"Nonexistent effect set with id %lu\n", (
long unsigned int) effID.
id);
10063 double worst_gebv = 0;
10064 double worst_score;
10076 worst_gebv += (2*worst_score);
10106 if ((f = fopen(fname,
"w")) == NULL) {
10107 fprintf(stderr,
"Failed to open file %s for writing output\n", fname);
return;
10140 const _Bool markers_as_rows) {
10142 if ((f = fopen(fname,
"w")) == NULL) {
10143 fprintf(stderr,
"Failed to open file %s for writing output\n", fname);
return;
10179 const _Bool markers_as_rows) {
10181 if ((f = fopen(fname,
"w")) == NULL) {
10182 fprintf(stderr,
"Failed to open file %s for writing output\n", fname);
return;
10189 markers_as_rows, allele);
10219 const _Bool full_pedigree) {
10221 if ((f = fopen(fname,
"w")) == NULL) {
10222 fprintf(stderr,
"Failed to open file %s for writing output\n", fname);
return;
10254 if ((f = fopen(fname,
"w")) == NULL) {
10255 fprintf(stderr,
"Failed to open file %s for writing output\n", fname);
return;
10260 fprintf(stderr,
"Marker effect set %lu does not exist: cannot calculate breeding values\n", (
long unsigned int) effID.
id);
return;
10278 switch (chr.
type) {
10279 case GSC_LINKAGEGROUP_SIMPLE:
10291 case GSC_LINKAGEGROUP_REORDER:
10365 char**
const marker_names,
10371 const char header[] =
"Chrom\tLen\tMarkers\n";
10372 fwrite(header,
sizeof(
char)*strlen(header), 1, f);
10383 for (
GSC_GENOLEN_T chrix = 0; chrix < map->n_chr; ++chrix) {
10385 map->chrs[chrix],&minpos)) {
10391 map->chrs[chrix],&pos)) {
10392 maxpos = (pos > maxpos) ? pos : maxpos;
10393 minpos = (pos < minpos) ? pos : minpos;
10399 len = maxpos - minpos;
10405 if (isonchr >= 0) {
10406 fprintf(f,
"%lu\t%lf\t",(
long unsigned int)isonchr,len*100);
10408 const char colns[] =
"-\t-\t";
10409 fwrite(colns,
sizeof(
char)*strlen(colns), 1, f);
10416 if (k <= n_markers) {
10417 fwrite(marker_names[k],
sizeof(
char)*strlen(marker_names[k]), 1, f);
10419 fprintf(f,
"%lu",(
long unsigned int)k);
10424 fwrite(
"\n",
sizeof(
char), 1, f);
10440 char**
const marker_names,
10441 const _Bool markers_as_rows,
10442 void (*bodycell_printer)(FILE*,
10446 void* bodycell_printer_data) {
10451 fprintf(f,
"%lu",(
long unsigned int) targets->
group.
num);
10455 if (markers_as_rows) {
10458 if (targets != NULL) {
10461 fwrite(
"\t",
sizeof(
char), 1, f);
10465 fwrite(n,
sizeof(
char)*strlen(n), 1, f);
10467 fprintf(f,
"%lu", (
long unsigned int)
gsc_get_id(loc).
id);
10472 fwrite(
"\n",
sizeof(
char), 1, f);
10480 if (ntargets > 0 && ((row < n_markers || (ntargets > 0 && row < targets->cachedAM->n_markers)))) {
10482 if (genos != NULL) {
10489 while (row < n_markers || (ntargets > 0 && row < targets->cachedAM->n_markers)) {
10491 if (row < n_markers) {
10492 if (marker_names[row] != NULL) {
10493 fwrite(marker_names[row],
sizeof(
char)*strlen(marker_names[row]), 1, f);
10500 if (genos != NULL) {
10507 fwrite(
"\t",
sizeof(
char), 1, f);
10508 bodycell_printer(f,loc,row,bodycell_printer_data);
10511 fwrite(
"\n",
sizeof(
char), 1, f);
10514 if (genos != NULL) {
GSC_FREE(genos); }
10518 if (marker_names != NULL) {
10520 fwrite(
"\t",
sizeof(
char), 1, f);
10521 if (marker_names[i] != NULL) {
10522 fwrite(marker_names[i],
sizeof(
char)*strlen(marker_names[i]), 1, f);
10525 fwrite(
"\n",
sizeof(
char), 1, f);
10529 if (targets != NULL) {
10535 fwrite(n,
sizeof(
char)*strlen(n), 1, f);
10537 fprintf(f,
"%lu", (
long unsigned int)
gsc_get_id(loc).
id);
10542 fwrite(
"\t",
sizeof(
char), 1, f);
10543 bodycell_printer(f,loc,i,bodycell_printer_data);
10545 fwrite(
"\n",
sizeof(
char), 1, f);
10574 char allele = *(
char*) data;
10576 if (
get_alleles(loc)[2*markerix] == allele) { ++count; }
10577 if (
get_alleles(loc)[2*markerix + 1] == allele) { ++count; }
10578 char out =
'0' + count;
10579 fwrite(&out,
sizeof(
char), 1, f);
10633 char**
const marker_names,
10634 const _Bool markers_as_rows) {
10690 char**
const marker_names,
10691 const _Bool markers_as_rows,
10692 const char allele) {
10706 void (*strprinter)(
char*,
size_t,
void*),
10707 void (*intprinter)(
long unsigned int,
void*),
10708 void* printer_data) {
10712 strprinter(
"=(",
sizeof(
char)*2,printer_data);
10721 if (p1.
id == p2.
id) {
10725 if (name != NULL) {
10726 strprinter(name,
sizeof(
char)*strlen(name), printer_data);
10728 intprinter((
long unsigned int) p1.
id,printer_data);
10738 if (name != NULL) {
10739 strprinter(name,
sizeof(
char)*strlen(name),printer_data);
10741 intprinter((
long unsigned int) p1.
id,printer_data);
10748 strprinter(
",",
sizeof(
char),printer_data);
10752 if (name != NULL) {
10753 strprinter(name,
sizeof(
char)*strlen(name),printer_data);
10755 intprinter((
long unsigned int) p2.
id,printer_data);
10765 strprinter(
")",
sizeof(
char),printer_data);
10771 FILE* f = (FILE*) data;
10772 fwrite(str, strlen, 1, f);
10778 FILE* f = (FILE*) data;
10779 fprintf(f,
"%lu", i);
10843 const _Bool full_pedigree,
10846 if (targets == NULL) {
return; }
10849 switch (full_pedigree) {
10856 fwrite(n,
sizeof(
char)*strlen(n), 1, f);
10858 fprintf(f,
"%lu", (
long unsigned int)
gsc_get_id(loc).
id);
10862 for (
int parent = 0; parent < 2; ++parent) {
10863 fwrite(
"\t",
sizeof(
char), 1, f);
10870 fwrite(n,
sizeof(
char)*strlen(n), 1, f);
10872 fprintf(f,
"%lu", (
long unsigned int) p.
id);
10876 fwrite(
"\n",
sizeof(
char), 1, f);
10885 fprintf(f,
"%lu\t", (
long unsigned int)
gsc_get_id(loc).
id);
10888 fwrite(n,
sizeof(
char)*strlen(n), 1, f);
10894 && parent_pedigree_store != NULL) {
10900 fwrite(
"\n",
sizeof(
char), 1, f);
10934 if (targets == NULL || eff == NULL) {
return; }
10939 for (
size_t i = 0; i < bvs.
cols; ++i) {
10941 fprintf(f,
"%lu", (
long unsigned int)
gsc_get_id(loc).
id);
10942 fwrite(
"\t",
sizeof(
char), 1, f);
10945 fwrite(n,
sizeof(
char), strlen(n), f);
10947 fwrite(
"\t",
sizeof(
char), 1, f);
10949 fwrite(
"\t\t",
sizeof(
char)*2, 1, f);
10952 fprintf(f,
"%lf", bvs.
matrix[0][i]);
10953 fwrite(
"\n",
sizeof(
char), 1, f);
char * gsc_calculate_optimal_possible_haplotype(const gsc_SimData *d, const gsc_GroupNum group, const gsc_EffectID effID)
Calculates the highest-breeding-value haplotype that can be created from the alleles present in a giv...
double gsc_calculate_optimal_possible_bv(const gsc_SimData *d, const gsc_GroupNum group, const gsc_EffectID effID)
Calculates the breeding value of the highest breeding-value genotype that can be created from the all...
gsc_DecimalMatrix gsc_calculate_allele_counts(const gsc_SimData *d, const gsc_GroupNum group, const char allele)
Calculates the number of times at each marker that a particular allele appears.
gsc_DecimalMatrix gsc_calculate_bvs(const gsc_SimData *d, const gsc_GroupNum group, const gsc_EffectID effID)
Calculate the fitness metric/breeding value for every genotype in the simulation or every genotype in...
void gsc_calculate_utility_allele_counts_pair(const unsigned int n_markers, const unsigned int n_genotypes, const char **const genotypes, const char allele, gsc_DecimalMatrix *counts, const char allele2, gsc_DecimalMatrix *counts2)
Calculates the number of times at each marker that two particular alleles appear.
gsc_MarkerBlocks gsc_create_evenlength_blocks_each_chr(const gsc_SimData *d, const gsc_MapID mapid, const unsigned int n)
Divide the genotype into blocks where each block contains all markers within a 1/n length section of ...
gsc_DecimalMatrix gsc_calculate_utility_bvs(gsc_BidirectionalIterator *targets, const gsc_EffectMatrix *effset)
Calculate the fitness metric/breeding value for a set of genotypes.
gsc_GroupNum gsc_split_by_bv(gsc_SimData *d, const gsc_GroupNum group, const gsc_EffectID effID, const unsigned int top_n, const _Bool lowIsBest)
Takes the top_n individuals in the group with the best breeding values/fitnesses and puts them in a n...
void gsc_calculate_group_local_bvs(const gsc_SimData *d, const gsc_MarkerBlocks b, const gsc_EffectID effID, const char *output_file, const gsc_GroupNum group)
Given a set of blocks of markers in a file, for each genotype in a group, calculate the local fitness...
void gsc_calculate_utility_allele_counts(const unsigned int n_markers, const unsigned int n_genotypes, const char **const genotypes, const char allele, gsc_DecimalMatrix *counts)
Calculates the number of times at each marker that a particular allele appears.
void gsc_calculate_local_bvs(const gsc_SimData *d, const gsc_MarkerBlocks b, const gsc_EffectID effID, const char *output_file)
Given a set of blocks of markers in a file, for each genotype saved, calculate the local BV for the f...
double gsc_calculate_minimal_bv(const gsc_SimData *d, const gsc_EffectID effID)
Takes a look at the currently-loaded effect values and returns the lowest possible breeding value any...
char * gsc_calculate_optimal_haplotype(const gsc_SimData *d, const gsc_EffectID effID)
Takes a look at the currently-loaded effect values and creates a string containing the allele with th...
gsc_MarkerBlocks gsc_load_blocks(const gsc_SimData *d, const char *block_file)
Given a file containing definitions of blocks of markers, process that file and return a struct conta...
double gsc_calculate_optimal_bv(const gsc_SimData *d, const gsc_EffectID effID)
Takes a look at the currently-loaded effect values and returns the highest possible breeding value an...
unsigned int gsc_get_group_genes(const gsc_SimData *d, const gsc_GroupNum group_id, unsigned int group_size, char **output)
Gets a shallow copy of the genes/alleles of each member of the group.
unsigned int gsc_get_group_parent_ids(const gsc_SimData *d, const gsc_GroupNum group_id, unsigned int group_size, const int whichParent, gsc_PedigreeID *output)
Gets the ids of either the first or second parent of each member of the group.
unsigned int gsc_get_group_parent_names(const gsc_SimData *d, const gsc_GroupNum group_id, unsigned int group_size, const int whichParent, char **output)
Gets the names of either the first or second parent of each member of the group.
unsigned int gsc_get_group_pedigrees(const gsc_SimData *d, const gsc_GroupNum group_id, unsigned int group_size, char **output)
Gets the full pedigree string (as per gsc_save_group_full_pedigree() ) of each member of the group.
unsigned int gsc_get_group_names(const gsc_SimData *d, const gsc_GroupNum group_id, unsigned int group_size, char **output)
Gets a shallow copy of the names of each member of the group.
unsigned int gsc_get_group_indexes(const gsc_SimData *d, const gsc_GroupNum group_id, unsigned int group_size, unsigned int *output)
Gets the 0-based global indexes of each member of the group.
unsigned int gsc_get_group_ids(const gsc_SimData *d, const gsc_GroupNum group_id, unsigned int group_size, gsc_PedigreeID *output)
Gets the ids of each member of the group.
unsigned int gsc_get_group_bvs(const gsc_SimData *d, const gsc_GroupNum group_id, const gsc_EffectID effID, unsigned int group_size, double *output)
Gets the breeding values/breeding values/fitnesses of each member of the group.
size_t gsc_get_existing_group_counts(gsc_SimData *d, gsc_GroupNum *out_groups, unsigned int *out_sizes)
Identify group numbers that currently have members, and how many members they have.
size_t gsc_get_existing_groups(gsc_SimData *d, gsc_GroupNum *output)
Identify group numbers that currently have members.
unsigned int gsc_get_group_size(const gsc_SimData *d, const gsc_GroupNum group_id)
Function to count the number of genotypes that currently belong to the specified group.
gsc_GroupNum gsc_make_double_crosses_from_file(gsc_SimData *d, const char *input_file, const gsc_MapID map1, const gsc_MapID map2, const gsc_GenOptions g)
Perform crosses between previously-generated offspring of pairs of parents identified by name in a fi...
gsc_GroupNum gsc_make_random_crosses(gsc_SimData *d, const gsc_GroupNum from_group, const unsigned int n_crosses, const unsigned int cap, const gsc_MapID which_map, const gsc_GenOptions g)
Performs random crosses among members of a group.
gsc_GroupNum gsc_make_crosses_from_file(gsc_SimData *d, const char *input_file, const gsc_MapID map1, const gsc_MapID map2, const gsc_GenOptions g)
Perform crosses between pairs of parents identified by name in a file and allocate the resulting offs...
gsc_GroupNum gsc_make_doubled_haploids(gsc_SimData *d, const gsc_GroupNum group, const gsc_MapID which_map, const gsc_GenOptions g)
Creates a doubled haploid from each member of a group.
gsc_GroupNum gsc_make_clones(gsc_SimData *d, const gsc_GroupNum group, const _Bool inherit_names, gsc_GenOptions g)
Creates an identical copy of each member of a group.
gsc_GroupNum gsc_make_random_crosses_between(gsc_SimData *d, const gsc_GroupNum group1, const gsc_GroupNum group2, const unsigned int n_crosses, const unsigned int cap1, const unsigned int cap2, const gsc_MapID map1, const gsc_MapID map2, const gsc_GenOptions g)
Performs random crosses where the first parent comes from one group and the second from another.
gsc_GroupNum gsc_scaffold_make_new_genotypes(gsc_SimData *d, const gsc_GenOptions g, void *parentIterator, union gsc_datastore_make_genotypes *datastore, int(*parentChooser)(void *, union gsc_datastore_make_genotypes *, unsigned int *, gsc_ParentChoice[static 2]), void(*offspringGenerator)(gsc_SimData *, union gsc_datastore_make_genotypes *, gsc_ParentChoice[static 2], gsc_GenoLocation))
Make new genotypes (generic function)
gsc_GroupNum gsc_make_all_unidirectional_crosses(gsc_SimData *d, const gsc_GroupNum from_group, const gsc_MapID mapID, const gsc_GenOptions g)
Perform crosses between all pairs of parents in the group from_group and allocates the resulting offs...
gsc_GroupNum gsc_self_n_times(gsc_SimData *d, const unsigned int n, const gsc_GroupNum group, const gsc_MapID which_map, const gsc_GenOptions g)
Selfs each member of a group for a certain number of generations.
gsc_GroupNum gsc_make_targeted_crosses(gsc_SimData *d, const size_t n_combinations, const unsigned int *firstParents, const unsigned int *secondParents, const gsc_MapID map1, const gsc_MapID map2, const gsc_GenOptions g)
Performs the crosses of pairs of parents whose indexes are provided in an array.
void gsc_delete_label(gsc_SimData *d, const gsc_LabelID which_label)
Clears memory of this label from the simulation and all its genotypes.
void gsc_delete_simdata(gsc_SimData *m)
Deletes a gsc_SimData object and frees its memory.
void gsc_delete_eff_set(gsc_SimData *d, gsc_EffectID effID)
Deletes a particular set of marker effects from memory.
void gsc_delete_randomaccess_iter(gsc_RandomAccessIterator *it)
Deletes a gsc_RandomAccessIterator object and frees its memory.
void gsc_delete_effect_matrix(gsc_EffectMatrix *m)
Deletes an gsc_EffectMatrix object and frees its memory.
void gsc_delete_recombination_map(gsc_SimData *d, const gsc_MapID which_map)
Deletes a particular recombination map from memory.
void gsc_delete_bidirectional_iter(gsc_BidirectionalIterator *it)
Deletes a gsc_BidirectionalIterator object.
void gsc_delete_markerblocks(gsc_MarkerBlocks *b)
Delete a gsc_MarkerBlocks struct.
void gsc_delete_dmatrix(gsc_DecimalMatrix *m)
Deletes a gsc_DecimalMatrix and frees its memory.
void gsc_delete_allele_matrix(gsc_AlleleMatrix *m)
Delete the gsc_AlleleMatrix linked list from m onwards and frees its memory.
void gsc_delete_recombination_map_nointegrity(gsc_RecombinationMap *m)
Deletes and clears the memory of a gsc_RecombinationMap struct.
void gsc_delete_group(gsc_SimData *d, const gsc_GroupNum group_id)
Deletes all genotypes belonging to a particular group.
void gsc_move_genotype(gsc_GenoLocation from, gsc_GenoLocation to, int *label_defaults)
Move all details of the genotype at one gsc_GenoLocation to another gsc_GenoLocation.
void gsc_delete_genome(gsc_KnownGenome *g)
Deletes and clears the memory of a gsc_KnownGenome object and its children.
size_t gsc_split_into_buckets(gsc_SimData *d, const gsc_GroupNum group_id, const size_t n, const unsigned int *counts, gsc_GroupNum *results)
Split a group into n groups of equal size (or size differing only by one, if n does not perfectly div...
size_t gsc_split_by_probabilities(gsc_SimData *d, const gsc_GroupNum group_id, const size_t n, const double *probs, gsc_GroupNum *results)
Allocate each member of the group to one of n groups with custom probabilities for each group.
size_t gsc_scaffold_split_by_somequality(gsc_SimData *d, const gsc_GroupNum group_id, void *somequality_data, gsc_GroupNum(*somequality_tester)(gsc_GenoLocation, void *, size_t, size_t, gsc_GroupNum *), size_t maxentries_results, gsc_GroupNum *results)
Split by some quality (generic function)
gsc_GroupNum gsc_split_evenly_into_two(gsc_SimData *d, const gsc_GroupNum group_id)
Split a group into two groups of equal size (or size differing only by one, if the original group had...
gsc_GroupNum gsc_split_by_label_range(gsc_SimData *d, const gsc_GroupNum group, const gsc_LabelID whichLabel, const int valueLowBound, const int valueHighBound)
Allocates the genotypes with values of a label in a particular range to a new group.
size_t gsc_split_into_individuals(gsc_SimData *d, const gsc_GroupNum group_id, size_t maxentries_results, gsc_GroupNum *results)
Split a group into n one-member groups.
size_t gsc_split_into_halfsib_families(gsc_SimData *d, const gsc_GroupNum group_id, const int parent, size_t maxentries_results, gsc_GroupNum *results)
Split a group into families of half-siblings by shared first or second parent.
size_t gsc_split_evenly_into_n(gsc_SimData *d, const gsc_GroupNum group_id, const size_t n, gsc_GroupNum *results)
Split a group into n groups of equal size (or size differing only by one, if n does not perfectly div...
size_t gsc_scaffold_split_by_someallocation(gsc_SimData *d, const gsc_GroupNum group_id, void *someallocator_data, gsc_GroupNum(*someallocator)(gsc_GenoLocation, gsc_SimData *, void *, size_t, size_t *, gsc_GroupNum *), size_t n_outgroups, gsc_GroupNum *outgroups)
Split by some allocator (generic function)
gsc_GroupNum gsc_combine_groups(gsc_SimData *d, const size_t list_len, const gsc_GroupNum *grouplist)
Combine a set of groups into one group.
gsc_GroupNum gsc_split_by_label_value(gsc_SimData *d, const gsc_GroupNum group, const gsc_LabelID whichLabel, const int valueToSplit)
Allocates the genotypes with a particular value of a label to a new group.
gsc_GroupNum gsc_split_randomly_into_two(gsc_SimData *d, const gsc_GroupNum group_id)
Flip a coin for each member of the group to decide if it should be moved to the new group.
gsc_GroupNum gsc_make_group_from(gsc_SimData *d, const size_t index_list_len, const unsigned int *genotype_indexes)
Take a list of indexes and allocate the genotypes at those indexes to a new group.
size_t gsc_split_randomly_into_n(gsc_SimData *d, const gsc_GroupNum group_id, const size_t n, gsc_GroupNum *results)
Allocate each member of the group to one of n groups with equal probability.
size_t gsc_split_into_families(gsc_SimData *d, const gsc_GroupNum group_id, size_t maxentries_results, gsc_GroupNum *results)
Split a group into families by their pedigrees.
gsc_BidirectionalIterator gsc_create_bidirectional_iter(gsc_SimData *d, const gsc_GroupNum group)
Create a bidirectional iterator.
gsc_AlleleMatrix * gsc_get_nth_AlleleMatrix(gsc_AlleleMatrix *listStart, const unsigned int n)
Get an gsc_AlleleMatrix by index in the linked list.
#define GSC_INVALID_GENO_LOCATION
Constant representing a nonexistent location in the simulation.
gsc_BidirectionalIterator gsc_create_bidirectional_iter_fromAM(gsc_AlleleMatrix *am, const gsc_GroupNum group)
gsc_GenoLocation gsc_next_forwards(gsc_BidirectionalIterator *it)
Get the next location from a bidirectional iterator.
gsc_GenoLocation gsc_set_bidirectional_iter_to_end(gsc_BidirectionalIterator *it)
Initialise a Bidirectional iterator to the end of its sequence.
gsc_RandomAccessIterator gsc_create_randomaccess_iter(gsc_SimData *d, const gsc_GroupNum group)
Create a Random Access Iterator.
gsc_GenoLocation gsc_set_bidirectional_iter_to_start(gsc_BidirectionalIterator *it)
Initialise a Bidirectional iterator to the start of its sequence.
gsc_GenoLocation gsc_next_backwards(gsc_BidirectionalIterator *it)
Get the previous location from a bidirectional iterator.
gsc_GenoLocation gsc_next_get_nth(gsc_RandomAccessIterator *it, const unsigned int n)
Get a location by index using a gsc_RandomAccessIterator.
#define GSC_IS_VALID_LOCATION(g)
Check if a GenoLocation is INVALID_GENO_LOCATION.
static gsc_PedigreeID gsc_get_id(const gsc_GenoLocation loc)
Get the persistent id of a genotype.
static char * gsc_get_name(const gsc_GenoLocation loc)
Get the name of a genotype.
static int gsc_get_label_value(const gsc_GenoLocation loc, const int labelIndex)
Get the value of a specific label of a genotype.
static char * gsc_get_alleles(const gsc_GenoLocation loc)
Get the alleles of a genotype.
static void gsc_set_group(const gsc_GenoLocation loc, const gsc_GroupNum group)
Set the current group membership of a genotype.
static gsc_PedigreeID gsc_get_first_parent(const gsc_GenoLocation loc)
Get the first/left parent of a genotype.
static gsc_PedigreeID gsc_get_second_parent(const gsc_GenoLocation loc)
Get the second/right parent of a genotype.
static void gsc_set_name(const gsc_GenoLocation loc, char *name)
Set the name of a genotype.
static gsc_GroupNum gsc_get_group(const gsc_GenoLocation loc)
Get the current group membership of a genotype.
gsc_AlleleMatrix * gsc_create_empty_allelematrix(const unsigned int n_markers, const unsigned int n_labels, const int *labelDefaults, const unsigned int n_genotypes)
Creator for an empty gsc_AlleleMatrix object of a given size.
gsc_EffectID gsc_load_effectfile(gsc_SimData *d, const char *filename)
Populates a gsc_SimData combination with effect values.
struct gsc_MultiIDSet gsc_load_data_files(gsc_SimData *d, const char *genotype_file, const char *map_file, const char *effect_file, const gsc_FileFormatSpec format)
Populates a gsc_SimData object with marker allele data, a genetic map, and (optionally) marker effect...
gsc_GroupNum gsc_load_genotypefile(gsc_SimData *d, const char *filename, const gsc_FileFormatSpec format)
Load a set of genotypes to a gsc_SimData object.
gsc_MapID gsc_create_uniformspaced_recombmap(gsc_SimData *d, unsigned int n_markers, char **markernames, double expected_n_recombinations)
Create a uniformly-spaced gsc_RecombinationMap from a list of marker names and save to SimData.
void gsc_clear_simdata(gsc_SimData *d)
Clear a gsc_SimData object on the heap.
gsc_SimData * gsc_create_empty_simdata(unsigned int RNGseed)
Creator for an empty gsc_SimData object on the heap.
gsc_MapID gsc_load_mapfile(gsc_SimData *d, const char *filename)
Load a genetic map to a gsc_SimData object.
gsc_MapID gsc_create_recombmap_from_markerlist(gsc_SimData *d, unsigned int n_markers, struct gsc_MapfileUnit *markerlist)
Parse a list of markers/chrs/positions into a gsc_RecombinationMap and save to SimData.
int gsc_add_matrixvector_product_to_dmatrix(gsc_DecimalMatrix *result, const gsc_DecimalMatrix *a, const double *b)
Multiply a gsc_DecimalMatrix to a vector, and add that product to the first column of a provided gsc_...
int gsc_randpoi(rnd_pcg_t *rng, double lambda)
Generates randomly a number from the Poisson distribution with parameter lambda, using the Knuth appr...
gsc_DecimalMatrix gsc_generate_zero_dmatrix(const size_t r, const size_t c)
Generates a matrix of c columns, r rows with all 0.
int gsc_add_doublematrixvector_product_to_dmatrix(gsc_DecimalMatrix *result, const gsc_DecimalMatrix *amat, const double *avec, const gsc_DecimalMatrix *bmat, const double *bvec)
Multiply two sets of a gsc_DecimalMatrix and vector, and add both products to the first column of a p...
void gsc_generate_clone(gsc_SimData *d, const char *parent_genome, char *output)
Get an identical copy of a given genotype.
void gsc_generate_doubled_haploid(gsc_SimData *d, const char *parent_genome, char *output, const unsigned int map_index)
Get the alleles of the outcome of producing a doubled haploid from a gamete from a given parent.
void gsc_generate_gamete(gsc_SimData *d, const char *parent_genome, char *output, const unsigned int map_index)
Fills a char* with the simulated result of meiosis (reduction and recombination) from the marker alle...
static int gsc_has_same_alleles_window(const char *g1, const char *g2, const size_t start, const size_t w)
Simple operator to determine if at markers with indexes i to i+w inclusive, two genotypes share at le...
int gsc_calculate_recombinations_from_file(gsc_SimData *d, const char *input_file, const char *output_file, int window_len, int certain)
Provides guesses as to the location of recombination events that led to the creation of certain genot...
int * gsc_calculate_min_recombinations_fw1(gsc_SimData *d, gsc_MapID mapid, char *parent1, unsigned int p1num, char *parent2, unsigned int p2num, char *offspring, int certain)
Identify markers in the genotype of offspring where recombination from its parents occured.
static int gsc_has_same_alleles(const char *p1, const char *p2, const size_t i)
Simple operator to determine if at marker i, two genotypes share at least one allele.
int * gsc_calculate_min_recombinations_fwn(gsc_SimData *d, gsc_MapID mapid, char *parent1, unsigned int p1num, char *parent2, unsigned int p2num, char *offspring, int window_size, int certain)
Identify markers in the genotype of offspring where recombination from its parents occured,...
void gsc_save_utility_markerblocks(FILE *f, const gsc_MarkerBlocks b, const unsigned int n_markers, char **const marker_names, const gsc_RecombinationMap *map)
Prints the markers contained in a set of blocks to a file.
void gsc_save_utility_pedigrees(FILE *f, gsc_BidirectionalIterator *targets, const _Bool full_pedigree, const gsc_AlleleMatrix *parent_pedigree_store)
Prints pedigrees to a file.
void gsc_save_markerblocks(const char *fname, const gsc_SimData *d, const gsc_MarkerBlocks b, const gsc_MapID labelMapID)
Prints the markers contained in a set of blocks to a file.
void gsc_save_utility_allele_counts(FILE *f, gsc_BidirectionalIterator *targets, unsigned int n_markers, char **const marker_names, const _Bool markers_as_rows, const char allele)
Prints allele counts of simulated genotypes to a file.
void gsc_save_bvs(const char *fname, const gsc_SimData *d, const gsc_GroupNum groupID, const gsc_EffectID effID)
Prints breeding values of genotypes in the simulation to a file.
void gsc_save_utility_bvs(FILE *f, gsc_BidirectionalIterator *targets, const gsc_EffectMatrix *eff)
Calculate and print breeding values to a file.
void gsc_save_allele_counts(const char *fname, const gsc_SimData *d, const gsc_GroupNum groupID, const char allele, const _Bool markers_as_rows)
Prints allele counts of genotypes from the simulation to a file.
void gsc_save_genotypes(const char *fname, const gsc_SimData *d, const gsc_GroupNum groupID, const _Bool markers_as_rows)
Prints genotypes from the simulation to a file.
void gsc_save_utility_genotypes(FILE *f, gsc_BidirectionalIterator *targets, unsigned int n_markers, char **const marker_names, const _Bool markers_as_rows)
Prints simulated genotypes to a file.
void gsc_save_pedigrees(const char *fname, const gsc_SimData *d, const gsc_GroupNum groupID, const _Bool full_pedigree)
Prints pedigrees of genotypes in the simulation to a file.
char * gsc_get_genes_of_index(const gsc_AlleleMatrix *start, const unsigned int index)
Get the alleles of a genotype by its index.
char * gsc_get_name_of_id(const gsc_AlleleMatrix *start, const gsc_PedigreeID id)
Returns the name of the genotype with a given id.
void gsc_get_ids_of_names(const gsc_AlleleMatrix *start, const size_t n_names, const char **names, gsc_PedigreeID *output)
Search for genotypes with certain names in a linked list of gsc_AlleleMatrix and save the ids of thos...
unsigned int gsc_get_index_of_child(const gsc_AlleleMatrix *start, const gsc_PedigreeID parent1id, const gsc_PedigreeID parent2id)
Search for a genotype with parentage matching two given parent ids in a linked list of gsc_AlleleMatr...
unsigned int gsc_get_index_of_name(const gsc_AlleleMatrix *start, const char *name)
Search for a genotype with a particular name in a linked list of gsc_AlleleMatrix,...
gsc_PedigreeID gsc_get_id_of_index(const gsc_AlleleMatrix *start, const unsigned int index)
Get the id of a genotype by its index.
int gsc_get_parents_of_id(const gsc_AlleleMatrix *start, const gsc_PedigreeID id, gsc_PedigreeID output[static 2])
Saves the ids of the parents of a genotype with a particular id to the output array output.
#define delete_bidirectional_iter
#define IS_VALID_LOCATION
#define INVALID_GENO_LOCATION
#define RandomAccessIterator
#define GSC_FINALISE_BUFFER(n, as, nentries)
Macro to convert a stretchy buffer to a solid heap vector.
gsc_GenotypeFileCellStyle
Represent possible representations of alleles at a marker in a genotype file.
gsc_TableFileReader gsc_tablefilereader_create(const char *filename)
Open a file for reading with gsc_TableFileReader.
#define GSC_NO_LABEL
Empty/null value for custom label identifiers.
#define GSC_STRETCH_BUFFER(n, newlen)
Macro to expand the capacity of a stretchy buffer.
void gsc_tablefilecell_deep_copy(gsc_TableFileCell *c)
Allocate memory to store a deep copy of a gsc_TableFileCell, if previously only a shallow copy.
void gsc_tablefilereader_close(gsc_TableFileReader *tbl)
Close a gsc_TableFileReader's file pointer.
#define GSC_DELETE_BUFFER(n)
Macro to delete a stretchy buffer.
#define GSC_NO_EFFECTSET
Empty/null value for effect set identifiers.
enum gsc_TableFileCurrentStatus gsc_helper_tablefilereader_classify_char(gsc_TableFileReader *tbl)
Classify the character under the cursor of a TableFileReader as cell contents or otherwise.
gsc_TableFileCell gsc_tablefilereader_get_next_cell(gsc_TableFileReader *tbl)
Read forwards in TableFileReader and return the next cell's contents, as well as how many column gaps...
gsc_TableFileCurrentStatus
Represent possible states of the cursor of a gsc_TableFileReader.
#define GSC_CREATE_BUFFER(n, type, length)
Macro to create a stretchy buffer of any type and some length.
const gsc_GenOptions GSC_BASIC_OPT
Default parameter values for GenOptions, to help with quick scripts and prototypes.
gsc_FileFormatSpec gsc_define_matrix_format_details(const GSC_LOGICVAL has_header, const GSC_LOGICVAL markers_as_rows, const enum gsc_GenotypeFileCellStyle cell_style)
Give genomicSimulation hints on the format of a genotype matrix file to be loaded.
void gsc_helper_tablefilereader_refill_buffer(gsc_TableFileReader *tbl)
Read another buffer's worth of characters from a gsc_TableFileReader's file.
#define GSC_NO_PEDIGREE
Empty/null value for pedigree fields.
gsc_GenotypeFileType
Enumerate types of genotype files that the simulation knows how to load.
#define GSC_NO_GROUP
Empty/null value for group allocations.
@ GSC_GENOTYPECELLSTYLE_SLASHPAIR
@ GSC_GENOTYPECELLSTYLE_PAIR
@ GSC_GENOTYPECELLSTYLE_UNKNOWN
@ GSC_GENOTYPECELLSTYLE_ENCODED
@ GSC_GENOTYPECELLSTYLE_COUNT
@ GSC_TABLEFILE_ERROR_EOF
@ GSC_TABLEFILE_ERROR_EOBUF
@ GSC_TABLEFILE_COLUMNGAP
@ GSC_GENOTYPEFILE_MATRIX
Either a marker-by-line matrix, where each marker is a row, or a line-by-marker matrix,...
@ GSC_GENOTYPEFILE_UNKNOWN
gsc_GroupNum gsc_get_next_free_group_num(const size_t n_existing_groups, const gsc_GroupNum *existing_groups, size_t *cursor, gsc_GroupNum previous)
Iterator to get the next currently-free group number.
unsigned int gsc_get_from_ordered_pedigree_list(const gsc_PedigreeID target, const unsigned int listLen, const gsc_PedigreeID *list)
Binary search through list of unsigned integers.
gsc_GroupNum gsc_get_new_group_num(gsc_SimData *d)
Function to identify the next sequential integer that does not identify a group that currently has me...
void gsc_change_label_to(gsc_SimData *d, const gsc_GroupNum whichGroup, const gsc_LabelID whichLabel, const int setTo)
Set the values of a custom label.
struct gsc_TableSize gsc_get_file_dimensions(const char *filename, const char sep)
Opens a table file and reads the number of columns and rows (including headers) separated by sep into...
int gsc_get_integer_digits(const int i)
Count and return the number of digits in i.
unsigned int gsc_get_index_of_map(const gsc_SimData *d, const gsc_MapID map)
Function to identify the lookup index of a recombination map identifier.
unsigned int gsc_get_index_of_label(const gsc_SimData *d, const gsc_LabelID label)
Function to identify the label lookup index of a label identifier.
gsc_MapID gsc_get_new_map_id(const gsc_SimData *d)
Function to identify the next sequential integer that is not already allocated to a map ID in the sim...
void gsc_shuffle_up_to(rnd_pcg_t *rng, void *sequence, const size_t item_size, const size_t total_n, const size_t n_to_shuffle)
Produce a random ordering of the first n elements in an array using a (partial) Fisher-Yates shuffle.
void gsc_change_label_default(gsc_SimData *d, const gsc_LabelID whichLabel, const int newDefault)
Set the default value of a custom label.
size_t gsc_get_from_ordered_str_list(const char *target, const size_t listLen, const char **list)
Binary search through a list of strings.
size_t gsc_get_from_unordered_str_list(const char *target, const size_t listLen, const char **list)
Linear search through a list of strings.
gsc_EffectID gsc_get_new_eff_set_id(const gsc_SimData *d)
Function to identify the next sequential integer that is not already allocated to a marker effect set...
void gsc_get_n_new_group_nums(gsc_SimData *d, const size_t n, gsc_GroupNum *result)
Function to identify the next n sequential integers that do not identify a group that currently has m...
void gsc_change_label_to_values(gsc_SimData *d, const gsc_GroupNum whichGroup, const unsigned int startIndex, const gsc_LabelID whichLabel, const size_t n_values, const int *values)
Copy a vector of integers into a custom label.
gsc_LabelID gsc_get_new_label_id(const gsc_SimData *d)
Function to identify the next sequential integer that is not already allocated to a label in the simu...
unsigned int gsc_get_index_of_eff_set(const gsc_SimData *d, const gsc_EffectID eff_set_id)
Function to identify the lookup index of a marker effect set identifier.
gsc_LabelID gsc_create_new_label(gsc_SimData *d, const int setTo)
Initialises a new custom label.
void gsc_change_names_to_values(gsc_SimData *d, const gsc_GroupNum whichGroup, const unsigned int startIndex, const size_t n_values, const char **values)
Copy a vector of strings into the genotype name field.
_Bool gsc_get_index_of_genetic_marker(const char *target, gsc_KnownGenome g, unsigned int *out)
Return whether or not a marker name is present in the tracked markers, and at what index.
void gsc_change_allele_symbol(gsc_SimData *d, const char *which_marker, const char from, const char to)
Replace all occurences of a given allele with a different symbol representation.
void gsc_change_label_by_amount(gsc_SimData *d, const gsc_GroupNum whichGroup, const gsc_LabelID whichLabel, const int byValue)
Increment the values of a custom label.
void gsc_condense_allele_matrix(gsc_SimData *d)
A function to tidy the internal storage of genotypes after addition or deletion of genotypes in the g...
unsigned int gsc_randomdraw_replacementrules(gsc_SimData *d, unsigned int max, unsigned int cap, unsigned int *member_uses, unsigned int noCollision)
Randomly pick a number in a range, optionally with a cap on how many times a number can be picked,...
static gsc_GroupNum gsc_helper_split_by_allocator_knowncounts(gsc_GenoLocation loc, gsc_SimData *d, void *datastore, size_t n_outgroups, size_t *subgroupsfound, gsc_GroupNum *outgroups)
static gsc_GroupNum gsc_helper_split_by_quality_halfsib2(gsc_GenoLocation loc, void *datastore, size_t maxgroups, size_t groupsfound, gsc_GroupNum *results)
static int gsc_helper_parentchooser_cross_randomly(void *parentIterator, union gsc_datastore_make_genotypes *datastore, unsigned int *counter, gsc_ParentChoice parents[static 2])
parentChooser function parameter for gsc_make_random_crosses.
static void gsc_helper_genoptions_save_genotypes(FILE *fg, gsc_AlleleMatrix *tosave)
save-as-you-go (genotypes/alleles)
static GSC_LOGICVAL gsc_helper_parse_3cell_header(gsc_TableFileReader *tf, const char **canonical_titles, int *col_order, gsc_TableFileCell *unprocessedqueue, size_t *queuesize)
Header row reading and processing for map and effect set files.
static struct gsc_EmptyListNavigator gsc_create_emptylistnavigator(gsc_SimData *d, gsc_GroupNum allocation_group)
Create a new gsc_EmptyListNavigator, including an empty AlleleMatrix suitable for inserting into the ...
static FILE * gsc_helper_genoptions_save_pedigrees_setup(const gsc_GenOptions g)
Opens file for writing save-as-you-go pedigrees in accordance with gsc_GenOptions.
static void gsc_helper_output_genotypematrix_cell(FILE *f, gsc_GenoLocation loc, unsigned int markerix, void *GSC_NA)
Kernel for gsc_scaffold_save_genotype_info, when the goal is to save the (phased) allele pairs of eac...
static void gsc_helper_make_offspring_doubled_haploids(gsc_SimData *d, union gsc_datastore_make_genotypes *datastore, gsc_ParentChoice parents[static 2], gsc_GenoLocation putHere)
offspringGenerator function parameter for gsc_make_doubled_haploids.
static void gsc_helper_genoptions_save_bvs(FILE *fe, gsc_EffectMatrix *effMatrices, unsigned int effIndex, gsc_AlleleMatrix *tosave)
save-as-you-go (breeding values)
static int gsc_helper_parentchooser_cross_targeted(void *parentIterator, union gsc_datastore_make_genotypes *datastore, unsigned int *counter, gsc_ParentChoice parents[static 2])
parentChooser function parameter for gsc_make_targeted_crosses.
static int gsc_helper_parentchooser_cross_randomly_between(void *parentIterator, union gsc_datastore_make_genotypes *datastore, unsigned int *counter, gsc_ParentChoice parents[static 2])
parentChooser function parameter for gsc_make_random_crosses_between.
static FILE * gsc_helper_genoptions_save_genotypes_setup(const gsc_SimData *d, const gsc_GenOptions g)
Opens file for writing save-as-you-go genotypes in accordance with gsc_GenOptions.
static gsc_MapID gsc_helper_insert_recombmap_into_simdata(gsc_SimData *d, gsc_RecombinationMap map)
Save a RecombinationMap to the SimData and allocate it a mapID.
static void gsc_set_names(gsc_AlleleMatrix *a, const char *prefix, const int suffix, const unsigned int from_index)
Fills the designated section of the .names array in an gsc_AlleleMatrix with the pattern "`prefix`ind...
static gsc_EffectID gsc_helper_insert_eff_set_into_simdata(gsc_SimData *d, gsc_EffectMatrix effset)
Save an EffectMatrix to the SimData and allocate it an EffectID.
static struct gsc_GenotypeFile_MatrixFormat gsc_helper_genotypefile_matrix_detect_orientation(const gsc_SimData *d, const gsc_TableFileCell *cellqueue, const size_t firstrowlen, const size_t queuelen, struct gsc_GenotypeFile_MatrixFormat format, const char *filenameforlog)
Determine whether a genotype matrix is row- or column-oriented.
static void gsc_scaffold_save_genotype_info(FILE *f, gsc_BidirectionalIterator *targets, unsigned int n_markers, char **const marker_names, const _Bool markers_as_rows, void(*bodycell_printer)(FILE *, gsc_GenoLocation, unsigned int, void *), void *bodycell_printer_data)
Prints a matrix of genotype information to a file.
static int gsc_helper_ascending_pdouble_comparer(const void *pp0, const void *pp1)
Comparator function for qsort.
static struct gsc_GenotypeFile_MatrixFormat gsc_helper_genotypefile_matrix_detect_header(const gsc_TableFileCell *cellqueue, const size_t firstrowlen, const size_t queuelen, struct gsc_GenotypeFile_MatrixFormat format, const char *filenameforlog)
Determine whether a genotype matrix has a header row or not.
static void gsc_helper_make_offspring_self_n_times(gsc_SimData *d, union gsc_datastore_make_genotypes *datastore, gsc_ParentChoice parents[static 2], gsc_GenoLocation putHere)
offspringGenerator function parameter for gsc_self_n_times.
static int gsc_helper_descending_pdouble_comparer(const void *pp0, const void *pp1)
Comparator function for qsort.
static int gsc_helper_ascending_double_comparer(const void *pp0, const void *pp1)
Comparator function for qsort.
static void gsc_helper_ancestry_intprinter_file(long unsigned int i, void *data)
Kernel for scaffold functions that require printing an integer to a file (as opposed to saving the in...
static gsc_GenoLocation gsc_emptylistnavigator_get_first(struct gsc_EmptyListNavigator *it)
Reset the cursor of a gsc_EmptyListNavigator to the first genotype.
static void gsc_emptylistnavigator_finaliselist(struct gsc_EmptyListNavigator *it)
Push emptylist edited genotypes into the SimData.
static gsc_GenoLocation gsc_emptylistnavigator_get_next(struct gsc_EmptyListNavigator *it)
Get the next sequential genotype in an gsc_EmptyListNavigator.
static int gsc_helper_mapfileunit_ascending_d_comparer(const void *p0, const void *p1)
Comparator function for qsort.
static gsc_GroupNum gsc_helper_split_by_quality_individuate(gsc_GenoLocation loc, void *datastore, size_t maxgroups, size_t groupsfound, gsc_GroupNum *results)
static gsc_GenoLocation gsc_nextgappy_valid_pos(struct gsc_GappyIterator *it)
Sets the current cursor position in a gsc_GappyIterator to the next valid position,...
static gsc_GroupNum gsc_helper_split_by_quality_halfsib1(gsc_GenoLocation loc, void *datastore, size_t maxgroups, size_t groupsfound, gsc_GroupNum *results)
static GSC_LOGICVAL gsc_helper_is_marker_in_chr(const unsigned int markerix, const gsc_LinkageGroup chr, double *pos)
Check if a marker index is found in a particular LinkageGroup, and provide its distance along the chr...
static void gsc_helper_genotypecell_to_allelematrix(gsc_GenoLocation loc, unsigned int markerix, enum gsc_GenotypeFileCellStyle style, char *cell, gsc_SimData *forrng)
Parse a string and save it as the alleles of a genotype at a particular location and genetic marker.
static void gsc_scaffold_save_ancestry_of(const gsc_AlleleMatrix *m, gsc_PedigreeID p1, gsc_PedigreeID p2, void(*strprinter)(char *, size_t, void *), void(*intprinter)(long unsigned int, void *), void *printer_data)
Identifies and saves (recursively) the pedigree of a pair of parents.
static gsc_TableFileCell gsc_helper_tablefilereader_get_next_cell_wqueue(gsc_TableFileReader *tf, gsc_TableFileCell **queue, size_t *queuesize)
Return the next cell from a queue of cells until the queue is exhausted, and thereafter read new cell...
static int gsc_helper_parentchooser_cloning(void *parentIterator, union gsc_datastore_make_genotypes *datastore, unsigned int *counter, gsc_ParentChoice parents[static 2])
parentChooser function parameter for gsc_make_clones.
static gsc_GroupNum gsc_helper_split_by_quality_halfsibtemplate(gsc_GenoLocation loc, void *datastore, size_t maxgroups, size_t groupsfound, gsc_GroupNum *results, gsc_PedigreeID(*getparent)(gsc_GenoLocation))
static gsc_GroupNum gsc_helper_split_by_allocator_equalprob(gsc_GenoLocation loc, gsc_SimData *d, void *datastore, size_t n_outgroups, size_t *subgroupsfound, gsc_GroupNum *outgroups)
static void gsc_helper_make_offspring_cross(gsc_SimData *d, union gsc_datastore_make_genotypes *datastore, gsc_ParentChoice parents[static 2], gsc_GenoLocation putHere)
offspringGenerator function parameter for all crossing functions
static void gsc_helper_genoptions_save_pedigrees(FILE *fp, gsc_SimData *d, gsc_AlleleMatrix *tosave)
save-as-you-go (pedigrees)
static int gsc_helper_mapfileunit_ascending_chr_comparer(const void *p0, const void *p1)
Comparator function for qsort.
static enum gsc_GenotypeFileCellStyle gsc_helper_genotype_matrix_identify_cell_style(gsc_TableFileCell c)
Identify what formatting a genotype matrix is representing alleles as.
static void * gsc_malloc_wrap(const size_t size, char exitonfail)
Replace calls to malloc direct with this function.
static int gsc_helper_parentchooser_selfing(void *parentIterator, union gsc_datastore_make_genotypes *datastore, unsigned int *counter, gsc_ParentChoice parents[static 2])
parentChooser function parameter for gsc_self_n_times.
static void gsc_helper_make_offspring_clones(gsc_SimData *d, union gsc_datastore_make_genotypes *datastore, gsc_ParentChoice parents[static 2], gsc_GenoLocation putHere)
offspringGenerator function parameter for gsc_make_clones.
gsc_GroupNum gsc_make_n_crosses_from_top_m_percent(gsc_SimData *d, const int n, const int m, const gsc_GroupNum group, const gsc_MapID mapID, const gsc_EffectID effID, const gsc_GenOptions g)
static void gsc_helper_genoptions_give_names_and_ids(gsc_AlleleMatrix *am, gsc_SimData *d, const gsc_GenOptions g)
Apply gsc_GenOptions naming scheme and gsc_PedigreeID allocation to a single gsc_AlleleMatrix.
static FILE * gsc_helper_genoptions_save_bvs_setup(const gsc_SimData *d, const gsc_GenOptions g, unsigned int *effIndexp)
Opens file for writing save-as-you-go breeding values in accordance with gsc_GenOptions.
static void gsc_helper_sort_markerlist(unsigned int n_markers, struct gsc_MapfileUnit *markerlist)
Sort markerlist by chromosome name, and by position within each chromosome.
static struct gsc_GenotypeFile_MatrixFormat gsc_helper_genotypefile_matrix_detect_cellstyle(const gsc_TableFileCell *cellqueue, const size_t firstrowlen, const size_t queuelen, struct gsc_GenotypeFile_MatrixFormat format, const char *filenameforlog)
Determine the style in which alleles are stored in a genotype matrix.
static void gsc_helper_output_countmatrix_cell(FILE *f, gsc_GenoLocation loc, unsigned int markerix, void *data)
Kernel for gsc_scaffold_save_genotype_info, when the goal is to save the allele counts of a particula...
static gsc_GroupNum gsc_load_genotypefile_matrix(gsc_SimData *d, const char *filename, const gsc_FileFormatSpec format)
Loads a genotype file, with or without existing genome model in the SimData.
static int gsc_helper_indirect_alphabetical_str_comparer(const void *p0, const void *p1)
Comparator function for qsort.
static unsigned int gsc_helper_str_markerlist_leftjoin(gsc_KnownGenome g, unsigned int n_markers_in_list, struct gsc_MapfileUnit **markerlist)
Discard markers whose names are not present in a gsc_KnownGenome.
static size_t gsc_helper_parse_mapfile(const char *filename, struct gsc_MapfileUnit **out)
Extract the contents of a genetic map file.
static gsc_GenoLocation gsc_nextgappy_get_gap(struct gsc_GappyIterator *it)
Sets the current cursor position in a gsc_GappyIterator to the next empty position,...
static GSC_LOGICVAL gsc_helper_genotypefile_matrix_detect_cornercell_presence(const size_t ncellsfirstrow, const size_t ncellssecondrow, const _Bool secondrowheaderisempty)
Determine whether a genotype matrix has a corner cell or not.
static unsigned int gsc_helper_random_cross_checks(gsc_SimData *d, const gsc_GroupNum from_group, const unsigned int n_crosses, const unsigned int cap)
Check input parameters of random crossing functions.
static gsc_GenoLocation gsc_nextgappy_get_nongap(struct gsc_GappyIterator *it)
Sets the current cursor position in a gsc_GappyIterator to the next filled position,...
static void gsc_helper_ancestry_strprinter_file(char *str, size_t strlen, void *data)
Kernel for scaffold functions that require printing a string to a file (as opposed to saving the stri...
static gsc_GroupNum gsc_helper_split_by_allocator_unequalprob(gsc_GenoLocation loc, gsc_SimData *d, void *datastore, size_t n_outgroups, size_t *subgroupsfound, gsc_GroupNum *outgroups)
static gsc_GroupNum gsc_helper_split_by_quality_family(gsc_GenoLocation loc, void *datastore, size_t maxgroups, size_t groupsfound, gsc_GroupNum *results)
#define GSC_ID_T
genomicSimulation's "ID" type
#define GSC_NA_IDX
When accessing the current array index of a unique session ID, the "ID not found"/failure value is -1...
#define GSC_GLOBALX_T
genomicSimulation's "Candidate global index" type
#define GSC_GENOLEN_T
genomicSimulation's "Genotype length" type
GSC_LOGICVAL
genomicSimulation's "logical value" type
#define GSC_NA_LOCALX
For candidate local indexes, the INVALID/UNINITIALISED value is -1 (for signed types) or the maximum ...
#define GSC_LOCALX_T
genomicSimulation's "Candidate local index" type
#define GSC_NA_ID
For unique session IDs, the INVALID/UNINITIALISED value is 0.
#define GSC_NA_GLOBALX
For candidate global indexes, the INVALID/UNINITIALISED value is -1 (for signed types) or the maximum...
char * names[1000]
Array of dynamically allocated strings containing the names of the lines/genotypes in this matrix.
unsigned int n_genotypes
Number of genotypes currently loaded in this matrix.
unsigned int n_markers
Number of markers across which genotypes are tracked.
char * alleles[1000]
A matrix of SNP markers by lines/genotypes containing pairs of alleles eg TT, TA.
unsigned int n_labels
Number of custom labels currently available to this gsc_AlleleMatrix.
gsc_AlleleMatrix * next
Pointer to the next gsc_AlleleMatrix in the linked list, or NULL if this entry is the last.
int ** labels
Pointer to list of labels.
gsc_PedigreeID ids[1000]
Unique ID for each genotype.
gsc_GroupNum groups[1000]
Group allocation of each genotype.
gsc_PedigreeID pedigrees[2][1000]
Two lists of integer IDs of the parents of this genotype (if tracked), or 0 if we don't know/care.
A structure to iterate forwards and backwards through all genotypes in a gsc_SimData or through only ...
_Bool atEnd
Boolean that is TRUE if the iterator's 'cursor' is on the last genotype (genotype with the highest in...
unsigned int cachedAMIndex
Index of cachedAM in the linked list of gsc_AlleleMatrix beginning at d->m.
const gsc_GroupNum group
Group through which to iterate.
gsc_AlleleMatrix * cachedAM
Pointer to the gsc_AlleleMatrix from the linked list of gsc_AlleleMatrix beginning at d->m where the ...
_Bool atStart
Boolean that is TRUE if the iterator's 'cursor' is on the first genotype (genotype with the lowest in...
unsigned int localPos
Local index (index within the cachedAM) of the genotype in the linked list of gsc_AlleleMatrix beginn...
gsc_AlleleMatrix * am
Simulation genotypes through which to iterate.
A row-major heap matrix that contains floating point numbers.
double ** matrix
The actual matrix and contents.
size_t cols
number of columns in the matrix
size_t rows
Number of rows in the matrix.
A type representing a particular loaded set of marker effects.
A type that stores a matrix of effect values and their names.
char * effect_names
Character array containing allele characters ordered to match rows of effects.
gsc_DecimalMatrix effects
Effect on breeding value of alleles at markers.
A structure to hold an initially empty AlleleMatrix list whose genotypes can be accessed sequentially...
gsc_GroupNum alloctogroup
gsc_AlleleMatrix * firstAM
gsc_AlleleMatrix * localAM
A structure to iterate forwards through all positions in the gsc_AlleleMatrix linked list in gsc_SimD...
unsigned int cursorAMIndex
A type that contains choices of settings for gsc_SimData functions that create a new gsc_AlleleMatrix...
_Bool will_allocate_ids
A boolean: whether to allocate generated offspring session- unique IDs.
_Bool will_track_pedigree
A boolean: whether to track parentage of generated offspring.
_Bool will_name_offspring
A boolean: whether generated offspring should be given names.
const char * filename_prefix
A string used in save-as-you-go file names.
const char * offspring_name_prefix
If will_name_offspring is true, generated offspring are named with the concatenation {offspring_name_...
gsc_EffectID will_save_bvs_to_file
If equal to NO_EFFECTSET, no bvs are calculated or saved.
_Bool will_save_pedigree_to_file
A boolean.
unsigned int family_size
The number of offspring to produce from each cross.
_Bool will_save_to_simdata
A boolean.
_Bool will_save_alleles_to_file
A boolean.
An gsc_AlleleMatrix/gsc_AlleleMatrix index coordinate of a particular genotype in the simulation.
gsc_AlleleMatrix * localAM
Pointer to the gsc_AlleleMatrix in which the genotype can be found.
unsigned int localPos
Index in the localAM where the genotype can be found (min value: 0.
A type representing the identifier of a group of genotypes.
A type that stores the genome structure used in simulation.
char ** marker_names
A vector of n_markers strings containing the names of markers, ordered according to their index in an...
gsc_RecombinationMap * maps
A vector of n_maps recombination maps, to use for simulating meiosis.
char *** names_alphabetical
A vector of n_markers pointers to names in marker_names, ordered in alphabetical order of the names.
unsigned int n_markers
The total number of markers.
unsigned int n_maps
The number of recombination maps currently stored.
gsc_MapID * map_ids
A vector of n_maps identifiers for each of the recombination maps currently stored.
A type representing a particular custom label.
A generic store for a linkage group, used to simulate meiosis on a certain subset of markers.
gsc_ReorderedLinkageGroup reorder
enum gsc_LinkageGroup::gsc_LinkageGroupType type
gsc_SimpleLinkageGroup simple
union gsc_LinkageGroup::@6 map
A type representing a particular loaded recombination map.
Unprocessed data for one marker (linkage group and position) loaded from a map file.
A struct used to store a set of blocks of markers.
unsigned int num_blocks
The number of blocks whose details are stored here.
unsigned int * num_markers_in_block
Pointer to a heap array of length num_blocks containing the number of markers that make up each block...
unsigned int ** markers_in_block
Pointer to a heap array of length num_blocks, each entry in which is a pointer to a heap array with l...
Simple crate that stores a GroupNum, a MapID, and an EffectID.
gsc_GenoLocation loc
Location in the simulation where this parent is stored.
unsigned int mapindex
Index in d->genome.maps of the recombination map to use when producing gametes from this parent.
A type representing a program-lifetime-unique identifier for a genotype, to be used in tracking pedig...
A structure to search and cache indexes of all genotypes in a gsc_SimData or of all the members of a ...
unsigned int cacheSize
Length in gsc_GenoLocations of cache
const gsc_GroupNum group
Group through which to iterate.
unsigned int largestCached
Local/group index (that is, index in cache) of the highest cell in cache that has been filled.
gsc_SimData * d
Simulation data through which to iterate.
unsigned int groupSize
If the number of genotypes in the simulation that fulfil the iterator's group criteria is known,...
gsc_GenoLocation * cache
Array iteratively updated with the known genotypes in the simulation that fulfil the group criteria o...
A type that stores linkage groups and crossover probabilities for simulating meiosis.
size_t n_chr
The number of chromosomes/linkage groups represented in the map.
gsc_LinkageGroup * chrs
Vector of n_chr recombination maps, one for each chromosome/linkage group in this recombination map.
unsigned int n_markers
The number of markers in this chromosome/linkage group.
double expected_n_crossovers
Expected value of the Poisson distribution from which the number of crossovers in this linkage group ...
unsigned int * marker_indexes
Array with n_markers entries.
double * dists
Array with n_markers entries, containing at position i the distance in centimorgans along the linkage...
Composite type that is used to run crossing simulations.
unsigned int n_labels
The number of custom labels in the simulation.
gsc_KnownGenome genome
A gsc_KnownGenome, which stores the information of known markers and linkage groups,...
gsc_LabelID * label_ids
The identifier number of each label in the simulation, in order of their lookup index.
gsc_EffectID * eff_set_ids
The identifier number of each set of allele effects in the simulation, ordered by their lookup index.
gsc_EffectMatrix * e
Array of n_eff_sets gsc_EffectMatrix, optional for the use of the simulation.
int * label_defaults
Array containing the default (birth) value of each custom label.
unsigned int n_groups
Number of groups currently existing in simulation.
unsigned int n_eff_sets
The number of sets of allele effects in the simulation.
gsc_PedigreeID current_id
Highest SimData-unique ID that has been generated so far.
rnd_pcg_t rng
Random number generator working memory.
gsc_AlleleMatrix * m
Pointer to an gsc_AlleleMatrix, which stores data and metadata of founders and simulated offspring.
unsigned int first_marker_index
The index of the first marker in this chromosome/linkage group in the simulation's corresponding gsc_...
unsigned int n_markers
The number of markers in this chromosome/linkage group.
double * dists
Array with n_markers entries, containing at position i the distance in centimorgans along the linkage...
double expected_n_crossovers
Expected value of the Poisson distribution from which the number of crossovers in this linkage group ...
Represent a cell read by a gsc_TableFileReader.
int predCol
since last read, how many column gaps have there been?
char * cell
deep copy of the cell contents, or NULL
_Bool isCellShallow
is the string in 'cell' a shallow copy or deep copy?
int predNewline
since last read, how many newlines have there been?
_Bool eof
are we (this cell) at end of file
size_t cell_len
length of cell contents (because a shallow copy may not be null-terminated)
Stream reader for files of some tabular format.
int buf_fill
Number of characters from the file that are currently loaded in buf.
char buf[8192]
A window of characters from the file, loaded into memory for current processing.
int cursor
Index in buf of the first character that the file reader has not yet parsed.
FILE * fp
File being read.
struct gsc_datastore_make_genotypes::@5 clones
unsigned int n_gens_selfing
struct gsc_datastore_make_genotypes::@3 selfing
unsigned int bad_pairings
struct gsc_datastore_make_genotypes::@1 rand_btwn
struct gsc_datastore_make_genotypes::@0 rand
unsigned int * first_parents
unsigned int * second_parents
struct gsc_datastore_make_genotypes::@2 targeted
struct gsc_datastore_make_genotypes::@4 doub_haps