Usage examples
The following script is a test file that we use to verify the various functionalities of bio
.
The file lists the various usage modes of bio
and is the most up to date demonstration of the usage
#
# This script is used to generate Python tests.
#
# The output generated by each test can be seen at:
#
# https://github.com/ialbert/bio/tree/master/test/data
#
# Stop on errors.
set -uex
# Delete the ncov data if exists.
bio data --delete ncov,ratg13
# Fetch the accession, rename the data and change the sequence id.
bio fetch NC_045512 --rename ncov --seqid ncov
# Get the RaTG13 data.
bio fetch MN996532 --rename ratg13 --seqid ratg13
# Shows the internal JSON format of the data.
bio convert ncov --json > ncov.json
# Convert to GenBank.
bio convert ncov --genbank > ncov.gb
# Convert to FASTA.
bio convert ncov --fasta > ncov.fa
# Convert to GFF.
bio convert ncov --gff > ncov.gff
# Match with regular expression.
bio convert ncov --gff --match phosphoesterase > match.gff
# Convert features associated with a gene to GFF.
bio convert ncov --gff --gene S > gene.gff
# Convert to GFF features that overlap with start to end.
bio convert ncov --gff --start 10000 --end 20000 > overlap.gff
# Numbers may have commas.
bio convert ncov --gff --start 10,000 --end 20,000 > overlap.gff
# Numbers may be expressed with prefixes.
bio convert ncov --gff --start 10kb --end 20kb > overlap.gff
# Convert to GFF by type.
bio convert ncov --gff --type CDS > cds.gff
# Convert to GFF by multiple types.
bio convert ncov --gff --type gene,CDS,mRNA > manytypes.gff
# Slice a FASTA to a region and change the sequence id.
bio convert ncov --fasta --seqid foo --start 10 --end 20 > start.fa
# Convert to FASTA features with a certain type.
bio convert ncov --fasta --type CDS -end 10 > cds.fa
# Convert to FASTA a sub region of type filtered data.
bio convert ncov --fasta --type gene --end 10 > start-gene.fa
# Translate the DNA for features that have the type CDS.
bio convert ncov --fasta --translate --type CDS --end 10 > translates.fa
# Extract already translated proteins from the data.
# The translation attribute must be filled in GenBank.
bio convert ncov --fasta --protein --start -10 > protein-end.fa
# Coding sequences for a gene.
bio convert ncov --fasta --type CDS --gene S --end 10 > cds-gene.fa
# Shorcut1, all CDS that is labeled with gene=S
bio convert ncov:S --fasta --end 10 > cds-gene.fa
# Another shortcut, this time we access coding sequences by the id.
bio convert ncov --id YP_009724390.1 --fasta --end 10 > cds-gene.fa
# Extract the already traslated protein from the data.
bio convert ncov:S --fasta --protein --seqid foo > cds-prot.fa
# Interactive mode. Data obtained from the command line paramter
bio convert ATGGGC -i --fasta > inter.fa
# Translate in interactive mode.
bio convert ATGGGC -i --translate --seqid foo > inter-trans.fa
# Translate on the reverse complement.
bio convert ATGGGC -i --revcomp --translate --seqid foo > inter-revcomp1.fa
# You can separately reverse and complement
bio convert ATGGGC -i --reverse --complement --translate --seqid foo > inter-revcomp2.fa
# Align the first 200 bp across both genomes.
bio align ncov ratg13 --end 180 > align-dna.txt
# Align the DNA for the coding sequences of the S protein.
bio align ncov:S ratg13:S --end 180 > align-gene.txt
# Align the translated DNA for the coding sequences of the S protein.
bio align ncov:S ratg13:S --end 180 --translate > align-translation.txt
# Generate one letter peptide trace above the DNA
bio align ratg13:S ncov:S --end 180 -1 > align-gene-pept1.txt
# Generate three letter peptide trace above the DNA
bio align ratg13:S ncov:S --end 180 -3 > align-gene-pept3.txt
# Align the already translated proteins.
bio align ncov:S ratg13:S --protein --end 60 > align-protein.txt
# Local alignment in interactive mode.
bio align THISLINE ISALIGNED -i --local > align-local.txt
# Global alignment in interactive mode.
bio align THISLINE ISALIGNED -i --global > align-global.txt
# Semiglobal alignment in interactive mode.
bio align THISLINE ISALIGNED -i --semiglobal > align-semiglobal.txt
# Check taxonomy defaults
bio taxon 9606 > taxon_9606.txt
# Generate lineage.
bio taxon 9606 --lineage > taxon_9606_lineage.txt
# The lineage may be flattened to a single line.
bio taxon 9606 --lineage --flat > taxon_9606_flat_lineage.txt
# Taxonomy information from data.
bio taxon ncov ratg13 > taxon_data.txt
# Remove the old ebola if exists.
bio data --delete ebola
# Fetch and rename the ebola data.
bio fetch KM233118 --rename ebola
# Get the links to the data.
bio runinfo ebola > sra-test.txt