Python code to parse and manipulate DNA and protein sequences
Posted On February 16, 2023
Following code demonstrates how to create and manipulate DNA and protein sequences using BioPython’s Seq and SeqRecord classes, including creating a DNA sequence, creating a SeqRecord object, accessing the sequence, performing the reverse complement, translating the DNA sequence into a protein sequence, and creating a SeqRecord object from the protein sequence.
Code
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
# Create a DNA sequence
dna_sequence = Seq("AGTACACTGGT")
# Create a SeqRecord object from the DNA sequence
dna_seq_record = SeqRecord(dna_sequence)
# Access the DNA sequence
print(dna_seq_record.seq)
# Reverse complement the DNA sequence
reverse_complement = dna_sequence.reverse_complement()
# Create a new SeqRecord object from the reverse complement
reverse_complement_seq_record = SeqRecord(reverse_complement)
# Access the reverse complement sequence
print(reverse_complement_seq_record.seq)
# Translate the DNA sequence into a protein sequence
protein_sequence = dna_sequence.translate()
# Create a new SeqRecord object from the protein sequence
protein_seq_record = SeqRecord(protein_sequence)
# Access the protein sequence
print(protein_seq_record.seq)
Output
AGTACACTGGT
ACCAGTGTACT
STL
Unit Test
Following code defines a TestSequenceManipulation class that tests the various functions of the code. The tests include checking the DNA sequence, the DNA SeqRecord object, the reverse complement, and the protein sequence. To run the tests, simply run the script and it will output the results of the tests.
import unittest
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
class TestSequenceManipulation(unittest.TestCase):
def setUp(self):
self.dna_sequence = Seq("AGTACACTGGT")
self.dna_seq_record = SeqRecord(self.dna_sequence)
def test_dna_sequence(self):
self.assertEqual(str(self.dna_sequence), "AGTACACTGGT")
def test_dna_seq_record(self):
self.assertEqual(str(self.dna_seq_record.seq), "AGTACACTGGT")
def test_reverse_complement(self):
reverse_complement = self.dna_sequence.reverse_complement()
self.assertEqual(str(reverse_complement), "ACCAGTGTACAT")
def test_protein_sequence(self):
protein_sequence = self.dna_sequence.translate()
self.assertEqual(str(protein_sequence), "TSTAG")
if __name__ == '__main__':
unittest.main()