pipeline.sh

~/DMU/msa_sandpit_2017.3.13> cat pipeline.sh


#!/usr/bin/python
# -*- coding: utf-8 -*-

# --------------------------------------------------
# File Name: pipeline.sh
# Purpose:
# Creation Date: 09-03-2017
# Last Modified: Mon, Mar 13, 2017  2:42:52 AM
# Author(s): Mike Stout 
# Copyright 2017 The Author(s) All Rights Reserved
# Credits: 
# --------------------------------------------------

source ~/.bashrc

rm *.txt *.seq all.fa all.msa


# Download the html files and convert to txt ...
for f in Q1 Q1M Q2 Q2M F1 FM
do
	sh scrape.sh Ham_${f}
done



# Convert asci utf-8 text to dna codons ....
for f in `ls *.txt`
do 
	echo $f
	py encode.py $f | tee ${f}.seq
done

# Compile the dna sequences into fasta format for MSA ...
for f in `ls *.seq | sort `
do 
	echo ">$f"
	cat $f
done > all.fa


# Do multiple sequence alignment using Muscle ...
cmd /c muscle3.8.31_i86win32.exe -maxiters 1 -diags -in all.fa -out all.msa

# Decode the results and format for Humans readablility ...
cat all.msa | \
	rh decode.hs | \
	sed 's/\\n/ /g' | \
	sed -e 's/\\t/\t/g' | \
	sed -e 's/>/\n"/g' | \
	tr "\"" " " | tee all.msa.text

exit

# Make Phylogenetic Tree ....
cmd /c muscle3.8.31_i86win32.exe -maketree -in all.aligned.fasta -out seqs.phy

# Render the tree ...
py toTree.py