๐Ÿ“ฆ Monty / DNA_Match_Manager_Helper

๐Ÿ“„ getFieldsFromDNA.awk ยท 152 lines
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152# Grab appropriate fields from DNA_Match_Manager output
BEGIN {
    FS = ","
    # stop at 99.5 cMs (to handle rounding up to 100) unless cMs_min is set
    if (cMs_min == "") cMs_min = 99.5

    cMs_pct_factor = 74.6 # Used to convert cMs to pct and vice versa
}

/^Source Site/ {
    for (i = 1; i <= NF; i++) {
        ### To add a site specific column, add an if statement below. The file awkStatements.txt will help.
        if ($i == "Source Site") Source = i

        if ($i == "Match Name") Name = i

        if ($i == "Email") Email = i

        if ($i == "Match Gender") Gender = i

        if ($i == "an_Shared cM") an_cMs = i

        if ($i == "an_Shared Segments") an_segs = i

        if ($i == "an_Group Name") an_relationship = i

        if ($i == "gm_Generation") gm_relationship = i

        if ($i == "gm_Largest Segment") gm_longest_cM = i

        if ($i == "gm_Total cM") gm_cMs = i

        if ($i == "ft_Total cM") ft_cMs = i

        if ($i == "ft_Suggested Relationship") ft_relationship = i

        if ($i == "ft_Longest Centimorgans") ft_longest_cM = i

        if ($i == "23_Set Relationship") tw_relationship = i

        if ($i == "23_Percent DNA Shared") tw_pct = i

        if ($i == "23_# Segments Shared") tw_segs = i

        if ($i == "mh_Estimated Relationship") mh_relationship = i

        if ($i == "mh_Total cM Shared") mh_cMs = i

        if ($i == "mh_Percent DNA Shared") mh_pct = i

        if ($i == "mh_Number Shared Segments") mh_segs = i

        if ($i == "mh_Largest Segment cM") mh_longest_cM = i
    }

    ### To add a column change the following line.
    print\
        "Source Site\tMatch Name\tEmail\tSide\tSex\tShared %\tShared cMs\tShared Segs\tLongest Seg\tRelationship"
    next
}

### To add a site specific column change each of the folllowing clauses.
/^23 & Me/ {
    if ($tw_pct * cMs_pct_factor < cMs_min) next

    side = ""
    pct = $tw_pct
    cMs = $tw_pct * cMs_pct_factor
    segs = $tw_segs
    longest_cM = ""
    relationship = $tw_relationship
}

/^Ancestry/ {
    if ($an_cMs < cMs_min) next

    side = ""
    pct = $an_cMs / cMs_pct_factor
    cMs = $an_cMs
    segs = $an_segs
    longest_cM = ""
    relationship = $an_relationship
}

/^FTDNA/ {
    if ($ft_cMs < cMs_min) next

    side = ""
    pct = $ft_cMs / cMs_pct_factor
    cMs = $ft_cMs
    segs = ""
    longest_cM = $ft_longest_cM
    relationship = $ft_relationship
}

/^GEDMatch/ {
    if ($gm_cMs < cMs_min) next

    side = ""
    pct = $gm_cMs / cMs_pct_factor
    cMs = $gm_cMs
    segs = ""
    longest_cM = sprintf("%d", $gm_longest_cM)
    relationship = $gm_relationship
}

/^My Heritage/ {
    if ($mh_cMs < cMs_min) next

    side = ""
    pct = $mh_pct
    cMs = $mh_cMs
    segs = $mh_segs
    longest_cM = sprintf("%d", $mh_longest_cM)
    relationship = $mh_relationship
}

{
    # Standardize and print one row of data
    sex = $Gender
    # Sometimes, FTDNA puts two spaces between names. Make it a single space.
    gsub(/  /, " ", $Name)
    sub(/Female/, "F", sex)
    sub(/Male/, "M", sex)
    sub(/U/, "", sex)
    sub(/PARENT_CHILD/, "Parent or child", relationship)
    sub(/CLOSE_FAMILY/, "Close family", relationship)
    sub(/FIRST_COUSIN/, "1st cousin", relationship)
    sub(/SECOND_COUSIN/, "2nd cousin", relationship)
    sub(/THIRD_COUSIN/, "3rd cousin", relationship)
    gsub(/Cousin/, "cousin", relationship)
    gsub(/; Once Removed/, " - once removed", relationship)
    gsub(/; Twice Removed/, " - twice removed", relationship)
    ### To add a column change the following 2 comment lines. Ditto in makeDNASpreadsheet.sh
    #      1      2      3     4      5      6    7    8      9           10
    #    Source  Name  Email  Side  Gender  Pct  cMs  Segs  Longest  Relationship
    ### To add a column change the following statement.
    printf(\
        "%s\t%s\t%s\t%s\t%s\t%.2f\t%.0f\t%s\t%s\t%s\n",
        $Source,
        $Name,
        $Email,
        side,
        sex,
        pct,
        cMs,
        segs,
        longest_cM,
        relationship\
    )
}