Usage examples

The following script is a test file that we use to verify the various functionalities of bio.

The file lists the various usage modes of bio and is the most up to date demonstration of the usage

#
# This script is used to generate Python tests.
#
# The output generated by each test can be seen at:
#
# https://github.com/ialbert/bio/tree/master/test/data
#

# Stop on errors.
set -uex

# Delete the ncov data if exists.
bio data --delete ncov,ratg13

# Fetch the accession, rename the data and change the sequence id.
bio fetch NC_045512 --rename ncov --seqid ncov

# Get the RaTG13 data.
bio fetch MN996532 --rename ratg13 --seqid ratg13

# Shows the internal JSON format of the data.
bio convert ncov --json > ncov.json

# Convert to GenBank.
bio convert ncov --genbank > ncov.gb

# Convert to FASTA.
bio convert ncov --fasta > ncov.fa

# Convert to GFF.
bio convert ncov --gff > ncov.gff

# Match with regular expression.
bio convert ncov --gff --match phosphoesterase  > match.gff

# Convert features associated with a gene to GFF.
bio convert ncov --gff --gene S > gene.gff

# Convert to GFF features that overlap with start to end.
bio convert ncov --gff  --start 10000 --end 20000 > overlap.gff

# Numbers may have commas.
bio convert ncov --gff  --start 10,000 --end 20,000 > overlap.gff

# Numbers may be expressed with prefixes.
bio convert ncov --gff  --start 10kb --end 20kb > overlap.gff

# Convert to GFF by type.
bio convert ncov --gff  --type CDS > cds.gff

# Convert to GFF by multiple types.
bio convert ncov --gff  --type gene,CDS,mRNA > manytypes.gff

# Slice a FASTA to a region and change the sequence id.
bio convert ncov --fasta --seqid foo --start 10 --end 20 > start.fa

# Convert to FASTA features with a certain type.
bio convert ncov --fasta --type CDS -end 10 > cds.fa

# Convert to FASTA a sub region of type filtered data.
bio convert ncov --fasta --type gene --end 10 > start-gene.fa

# Translate the DNA for features that have the type CDS.
bio convert ncov --fasta --translate --type CDS --end 10 > translates.fa

# Extract already translated proteins from the data.
# The translation attribute must be filled in GenBank.
bio convert ncov --fasta --protein --start -10 > protein-end.fa

# Coding sequences for a gene.
bio convert ncov --fasta --type CDS --gene S --end 10 > cds-gene.fa

# Shorcut1, all CDS that is labeled with gene=S
bio convert ncov:S --fasta --end 10 >  cds-gene.fa

# Another shortcut, this time we access coding sequences by the id.
bio convert ncov --id YP_009724390.1 --fasta --end 10 >  cds-gene.fa

# Extract the already traslated protein from the data.
bio convert ncov:S --fasta --protein --seqid foo > cds-prot.fa

# Interactive mode. Data obtained from the command line paramter
bio convert ATGGGC -i --fasta > inter.fa

# Translate in interactive mode.
bio convert ATGGGC -i --translate --seqid foo >  inter-trans.fa

# Translate on the reverse complement.
bio convert ATGGGC -i --revcomp --translate --seqid foo > inter-revcomp1.fa

# You can separately reverse and complement
bio convert ATGGGC -i --reverse --complement --translate --seqid foo >  inter-revcomp2.fa

# Align the first 200 bp across both genomes.
bio align ncov ratg13 --end 180 > align-dna.txt

# Align the DNA for the coding sequences of the S protein.
bio align ncov:S ratg13:S --end 180 > align-gene.txt

# Align the translated DNA for the coding sequences of the S protein.
bio align ncov:S ratg13:S --end 180 --translate > align-translation.txt

# Generate one letter peptide trace above the DNA
bio align ratg13:S ncov:S  --end 180 -1 > align-gene-pept1.txt

# Generate three letter peptide trace above the DNA
bio align ratg13:S ncov:S  --end 180 -3 > align-gene-pept3.txt

# Align the already translated proteins.
bio align ncov:S ratg13:S --protein --end 60 > align-protein.txt

# Local alignment in interactive mode.
bio align THISLINE ISALIGNED  -i --local > align-local.txt

# Global alignment in interactive mode.
bio align THISLINE ISALIGNED -i --global > align-global.txt

# Semiglobal alignment in interactive mode.
bio align THISLINE ISALIGNED -i --semiglobal > align-semiglobal.txt

# Check taxonomy defaults
bio taxon 9606 > taxon_9606.txt

# Generate lineage.
bio taxon 9606 --lineage > taxon_9606_lineage.txt

# The lineage may be flattened to a single line.
bio taxon 9606 --lineage --flat > taxon_9606_flat_lineage.txt

# Taxonomy information from data.
bio taxon  ncov ratg13 > taxon_data.txt

# Remove the old ebola if exists.
bio data --delete ebola

# Fetch and rename the ebola data.
bio fetch KM233118 --rename ebola

# Get the links to the data.
bio runinfo ebola > sra-test.txt