Download and Install MobaXterm https://mobaxterm.mobatek.net/
Ensure X11 Forwarding box is checked (should be by default)
Use the -X
flag when you log in:
#!/usr/bin/python3
from Bio.Seq import Seq #imports the sequence class
#if this import gives an error, install biopython
#make a Seq object
myseq = Seq("AGTACAGTGGT")
myseq
## Seq('AGTACAGTGGT')
## AGTACAGTGGT
## ['__add__', '__class__', '__contains__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__getitem__', '__gt__', '__hash__', '__imul__', '__init__', '__init_subclass__', '__le__', '__len__', '__lt__', '__module__', '__mul__', '__ne__', '__new__', '__radd__', '__reduce__', '__reduce_ex__', '__repr__', '__rmul__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_data', 'back_transcribe', 'complement', 'complement_rna', 'count', 'count_overlap', 'encode', 'endswith', 'find', 'index', 'join', 'lower', 'lstrip', 'reverse_complement', 'reverse_complement_rna', 'rfind', 'rindex', 'rsplit', 'rstrip', 'split', 'startswith', 'strip', 'tomutable', 'transcribe', 'translate', 'ungap', 'upper']
## ACCACTGTACT
## Seq('STV')
##
## /homes/hwheeler/.local/lib/python3.8/site-packages/Bio/Seq.py:2334: BiopythonWarning: Partial codon, len(sequence) not a multiple of three. Explicitly trim the sequence or add trailing N before translation. This may become an error in future.
## warnings.warn(
## 11
## Seq('STV')
gc_count = myseq.count("G") + myseq.count("C")
#calculate GC content
gc_content = gc_count/len(myseq) * 100
print(gc_content)
## 45.45454545454545
def gc(sequence):
'''function to calculate GC content'''
gc_count = sequence.count("G") + sequence.count("C")
gc_content = gc_count/len(sequence) * 100
return gc_content
gc(myseq)
## 45.45454545454545
from Bio import SeqIO
#use Biopython to parse FASTA files!
#need the SeqIO class
mydir = '/homes/data/'
handle = open(mydir + "example.fasta")
for record in SeqIO.parse(handle, 'fasta'):
#this will parse each sequence in the fasta file
print(record.id)
print(record.seq)
print(gc(record.seq))
## Rosalind_6404
## CCTGCGGAAGATCGGCACTAGAATAGCCAGAACCGTTTCTCTGAGGCTTCCGGCCTTCCCTCCCACTAATAATTCTGAGG
## 53.75
## Rosalind_5959
## CCATCGGTAGCGCATCCTTAGTCCAATTAAGTCCCTATCCAGGCGCTCCGCCGAAGGTCTATATCCATTTGTCAGCAGACACGC
## 53.57142857142857
## Rosalind_0808
## CCACCCTCGTGGTATGGCTAGGCATTCAGGAACCGGAGAACGCTTCAGACCAGCCCGGACTGGGAACCTGCGGGCAGTAGGTGGAAT
## 60.91954022988506
## SeqRecord(seq=Seq('CCACCCTCGTGGTATGGCTAGGCATTCAGGAACCGGAGAACGCTTCAGACCAGC...AAT'), id='Rosalind_0808', name='Rosalind_0808', description='Rosalind_0808', dbxrefs=[])
handle = open(mydir + "example.fasta")
#if you want to keep all of the sequences in the file, put them in a list
sequences = list(SeqIO.parse(handle, "fasta"))
sequences
## [SeqRecord(seq=Seq('CCTGCGGAAGATCGGCACTAGAATAGCCAGAACCGTTTCTCTGAGGCTTCCGGC...AGG'), id='Rosalind_6404', name='Rosalind_6404', description='Rosalind_6404', dbxrefs=[]), SeqRecord(seq=Seq('CCATCGGTAGCGCATCCTTAGTCCAATTAAGTCCCTATCCAGGCGCTCCGCCGA...CGC'), id='Rosalind_5959', name='Rosalind_5959', description='Rosalind_5959', dbxrefs=[]), SeqRecord(seq=Seq('CCACCCTCGTGGTATGGCTAGGCATTCAGGAACCGGAGAACGCTTCAGACCAGC...AAT'), id='Rosalind_0808', name='Rosalind_0808', description='Rosalind_0808', dbxrefs=[])]
## Seq('CCTGCGGAAGATCGGCACTAGAATAGCCAGAACCGTTTCTCTGAGGCTTCCGGC...AGG')
## 'CCTGCGGAAGATCGGCACTAGAATAGCCAGAACCGTTTCTCTGAGGCTTCCGGCCTTCCCTCCCACTAATAATTCTGAGG'