From ebc1dfba521f94934b433a6caba927ebb7ae2b78 Mon Sep 17 00:00:00 2001 From: Dimitri Sokolyuk Date: Fri, 11 Nov 2016 16:23:08 +0100 Subject: Add Nucleotide Count --- go/nucleotide-count/README.md | 45 +++++++++++ go/nucleotide-count/nucleotide_count_test.go | 116 +++++++++++++++++++++++++++ 2 files changed, 161 insertions(+) create mode 100644 go/nucleotide-count/README.md create mode 100644 go/nucleotide-count/nucleotide_count_test.go diff --git a/go/nucleotide-count/README.md b/go/nucleotide-count/README.md new file mode 100644 index 0000000..e40f5dd --- /dev/null +++ b/go/nucleotide-count/README.md @@ -0,0 +1,45 @@ +# Nucleotide Count + +Given a DNA string, compute how many times each nucleotide occurs in the string. + +DNA is represented by an alphabet of the following symbols: 'A', 'C', +'G', and 'T'. + +Each symbol represents a nucleotide, which is a fancy name for the +particular molecules that happen to make up a large part of DNA. + +Shortest intro to biochemistry EVAR: + +- twigs are to birds nests as +- nucleotides are to DNA and RNA as +- amino acids are to proteins as +- sugar is to starch as +- oh crap lipids + +I'm not going to talk about lipids because they're crazy complex. + +So back to nucleotides. + +DNA contains four types of them: adenine (`A`), cytosine (`C`), guanine +(`G`), and thymine (`T`). + +RNA contains a slightly different set of nucleotides, but we don't care +about that for now. + +To run the tests simply run the command `go test` in the exercise directory. + +If the test suite contains benchmarks, you can run these with the `-bench` +flag: + + go test -bench . + +For more detailed info about the Go track see the [help +page](http://exercism.io/languages/go). + +## Source + +The Calculating DNA Nucleotides_problem at Rosalind [http://rosalind.info/problems/dna/](http://rosalind.info/problems/dna/) + +## Submitting Incomplete Problems +It's possible to submit an incomplete solution so you can see how others have completed the exercise. + diff --git a/go/nucleotide-count/nucleotide_count_test.go b/go/nucleotide-count/nucleotide_count_test.go new file mode 100644 index 0000000..9fa3248 --- /dev/null +++ b/go/nucleotide-count/nucleotide_count_test.go @@ -0,0 +1,116 @@ +package dna + +import ( + "reflect" + "testing" +) + +var tallyTests = []struct { + strand DNA + nucleotide byte + expected int +}{ + {"", 'A', 0}, + {"ACT", 'G', 0}, + {"CCCCC", 'C', 5}, + {"GGGGGTAACCCGG", 'T', 1}, +} + +func TestNucleotideCounts(t *testing.T) { + for _, tt := range tallyTests { + if count, err := tt.strand.Count(tt.nucleotide); err != nil { + t.Fatal(err) + } else if count != tt.expected { + t.Fatalf("Got \"%v\", expected \"%v\"", count, tt.expected) + } + } +} + +func TestHasErrorForInvalidNucleotides(t *testing.T) { + dna := DNA("GATTACA") + if _, err := dna.Count('X'); err == nil { + t.Fatalf("X is an invalid nucleotide, but no error was raised") + } +} + +// In most cases, this test is pointless. +// Very occasionally it matters. +// Just roll with it. +func TestCountingDoesntChangeCount(t *testing.T) { + dna := DNA("CGATTGGG") + dna.Count('T') + count1, err := dna.Count('T') + if err != nil { + t.Fatal(err) + } + count2, err := dna.Count('T') + if err != nil { + t.Fatal(err) + } + if count1 != count2 || count2 != 2 { + t.Fatalf("Got %v, expected %v", []int{count1, count2}, []int{2, 2}) + } +} + +type histogramTest struct { + strand DNA + expected Histogram + err bool +} + +var histogramTests = []histogramTest{ + { + "", + Histogram{'A': 0, 'C': 0, 'T': 0, 'G': 0}, + false, + }, + { + "GGGGGGGG", + Histogram{'A': 0, 'C': 0, 'T': 0, 'G': 8}, + false, + }, + { + "AGCTTTTCATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATTAAAAAAAGAGTGTCTGATAGCAGC", + Histogram{'A': 20, 'C': 12, 'T': 21, 'G': 17}, + false, + }, + { + "GGXXX", + nil, + true, + }, +} + +func TestSequenceHistograms(t *testing.T) { + for _, tt := range histogramTests { + counts, err := tt.strand.Counts() + if tt.err && err == nil { + t.Fatalf("DNA{ %q }: expected error but didn't get one.", tt.strand) + } else if !tt.err && err != nil { + t.Fatalf("DNA{ %q }: expected no error but got error %s", tt.strand, err.Error()) + } else if !tt.err && !reflect.DeepEqual(counts, tt.expected) { + t.Fatalf("DNA{ %q }: Got %v, expected %v", tt.strand, counts, tt.expected) + } + } +} + +func BenchmarkSequenceHistograms(b *testing.B) { + b.StopTimer() + for _, tt := range histogramTests { + for i := 0; i < b.N; i++ { + b.StartTimer() + + tt.strand.Counts() + + b.StopTimer() + } + } +} + +const targetTestVersion = 2 + +func TestTestVersion(t *testing.T) { + if testVersion != targetTestVersion { + t.Errorf("Found testVersion = %v, want %v.", testVersion, targetTestVersion) + } +} -- cgit v1.2.3