~/DMU/msa_sandpit_2017.3.13> cat pipeline.sh #!/usr/bin/python # -*- coding: utf-8 -*- # -------------------------------------------------- # File Name: pipeline.sh # Purpose: # Creation Date: 09-03-2017 # Last Modified: Mon, Mar 13, 2017 2:42:52 AM # Author(s): Mike Stout # Copyright 2017 The Author(s) All Rights Reserved # Credits: # -------------------------------------------------- source ~/.bashrc rm *.txt *.seq all.fa all.msa # Download the html files and convert to txt ... for f in Q1 Q1M Q2 Q2M F1 FM do sh scrape.sh Ham_${f} done # Convert asci utf-8 text to dna codons .... for f in `ls *.txt` do echo $f py encode.py $f | tee ${f}.seq done # Compile the dna sequences into fasta format for MSA ... for f in `ls *.seq | sort ` do echo ">$f" cat $f done > all.fa # Do multiple sequence alignment using Muscle ... cmd /c muscle3.8.31_i86win32.exe -maxiters 1 -diags -in all.fa -out all.msa # Decode the results and format for Humans readablility ... cat all.msa | \ rh decode.hs | \ sed 's/\\n/ /g' | \ sed -e 's/\\t/\t/g' | \ sed -e 's/>/\n"/g' | \ tr "\"" " " | tee all.msa.text exit # Make Phylogenetic Tree .... cmd /c muscle3.8.31_i86win32.exe -maketree -in all.aligned.fasta -out seqs.phy # Render the tree ... py toTree.py