There are nucleotide sequences of protein coding genes , Want to convert to the corresponding amino acid sequence , You can use Python Of Biopython Module To achieve .
The original preservation of nucleotide sequences fasta There's a sequence in the file id And so on , We just need the nucleotide sequence , It can be used for Loop through each line , Output even lines to a new text document .
# opening the file
file1 = open('D:/.../PCGs/cytb/cytb.fas', 'r')
# creating another file to store even lines
file2 = open('D:/.../PCGs/cytb/cytb_no_label.fas', 'w')
# reading content of the files and writing even lines to another file
lines = file1.readlines()
for i in range(0, len(lines)):
if (i % 2 != 0):
file2.write(lines[i])
# closing the files
file1.close()
file2.close()
#importing the Biopython package
from Bio.Seq import Seq
from Bio.Alphabet import IUPAC
#opening the file
file1 = open('D:/.../PCGs/atp6/atp6_no_label.fas')
file2 = open('D:/.../PCGs/atp6/atp6_aa_no_label.fas', 'w')
#creating a list to store the nucleotide sequence of each row
dataMat = []
for line in file1.readlines():
curLine = line.strip().split(" ")
dataMat.append(curLine[:])
for i in dataMat[0:]:
#list to string
j = "".join(i)
coding_dna = Seq(j, IUPAC.unambiguous_dna)
pep = coding_dna.translate(table="Invertebrate Mitochondrial")
pep2 = str(pep)
print(pep2)
file2.write(pep2)
file2.write("\n")
file2.close()
Previously exported PEP Sequence preservation only contains amino acid arrangement information , There is no initial sequence id And so on , You can use the following script to add .
file1=open('D:/.../PCGs/nad6/nad6_aa_no_label.fas','r')
lines=[]
for line in file1:
lines.append(line)
file1.close()
file1=open('D:/.../PCGs/nad6/nad6_aa_no_label.fas','w')
lines.insert(0,'>td')
lines.insert(2,'>tj')
lines.insert(4,'>to')
lines.insert(6,'>tchi')
lines.insert(8,'>tcae')
lines.insert(10,'>tp')
lines.insert(12,'>ma')
s = '\n'.join(lines)
file1.write(s)
file1.close()