~/DMU/msa_sandpit_2017.3.13> cat pipeline.sh
#!/usr/bin/python
# -*- coding: utf-8 -*-
# --------------------------------------------------
# File Name: pipeline.sh
# Purpose:
# Creation Date: 09-03-2017
# Last Modified: Mon, Mar 13, 2017 2:42:52 AM
# Author(s): Mike Stout
# Copyright 2017 The Author(s) All Rights Reserved
# Credits:
# --------------------------------------------------
source ~/.bashrc
rm *.txt *.seq all.fa all.msa
# Download the html files and convert to txt ...
for f in Q1 Q1M Q2 Q2M F1 FM
do
sh scrape.sh Ham_${f}
done
# Convert asci utf-8 text to dna codons ....
for f in `ls *.txt`
do
echo $f
py encode.py $f | tee ${f}.seq
done
# Compile the dna sequences into fasta format for MSA ...
for f in `ls *.seq | sort `
do
echo ">$f"
cat $f
done > all.fa
# Do multiple sequence alignment using Muscle ...
cmd /c muscle3.8.31_i86win32.exe -maxiters 1 -diags -in all.fa -out all.msa
# Decode the results and format for Humans readablility ...
cat all.msa | \
rh decode.hs | \
sed 's/\\n/ /g' | \
sed -e 's/\\t/\t/g' | \
sed -e 's/>/\n"/g' | \
tr "\"" " " | tee all.msa.text
exit
# Make Phylogenetic Tree ....
cmd /c muscle3.8.31_i86win32.exe -maketree -in all.aligned.fasta -out seqs.phy
# Render the tree ...
py toTree.py