cd
mkdir 
cd NGS_Tutorial
wget https://bengtssonpalme.github.io/MPBIO-BBT045-2024/seq_tech/seq_tech_tutorial_py.ipynb
###
# optionally, if you want the images to show up properly
cp -r /cephyr/NOBACKUP/groups/bbt045_2024/data/img/ .

mkdir results/exercise1

# run bash commands from Jupyter Notebook with an ! at the start of the line
! mkdir results/exercise1

# Convert GFF3 > FASTA
! seqret -sequence data/ref/varicella.gb -feature -fformat gff3 -osformat fasta data/ref/varicella.fasta

# This file is outptut by seqret in the current directory (because bad design)
# So we move it where it belongs
! mv varicella.gff data/ref/

# Document how the FASTA file was created
! touch data/ref/README.txt
! echo "vaircella.fasta converted from GFF as:" > data/ref/README.txt
! echo "seqret -sequence data/ref/varicella.gb -feature -fformat gff3 -osformat fasta data/ref/varicella.fasta" >> data/ref/README.txt

# Save index files in own directory
! mkdir results/exercise1/bowtie_index
# Build the bowtie2 index
! bowtie2-build -f data/ref/varicella.fasta results/exercise1/bowtie_index/varicella

! mkdir results/exercise1/alignment
! bowtie2 -x results/exercise1/bowtie_index/varicella -1 data/seq/varicella1.fastq -2 data/seq/varicella2.fastq -S results/exercise1/alignment/varicella.sam

! samtools view -b -S -o results/exercise1/alignment/varicella.bam results/exercise1/alignment/varicella.sam

! samtools sort results/exercise1/alignment/varicella.bam -o results/exercise1/alignment/varicella.sorted.bam

! samtools index results/exercise1/alignment/varicella.sorted.bam

! samtools depth results/exercise1/alignment/varicella.sorted.bam | awk '{sum+=$3} END {print "Average = ", sum/124884}'

! samtools depth results/exercise1/alignment/varicella.sorted.bam > results/exercise1/alignment/coverage.tsv

# load necessary modules
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import warnings # module to manage warnings
warnings.simplefilter(action='ignore', category=FutureWarning) # prevent Python from warning of future feature deprecation

# import data into Pandas dataframe
alignment_coverage_df = pd.read_csv('results/exercise1/alignment/coverage.tsv', sep = '\t', names = ["reference_name", "position", "coverage_depth"])


# create histogram of data
alignment_coverage_plot = sns.histplot(data = alignment_coverage_df, x = "coverage_depth")
# next add labels to the axes & a title
alignment_coverage_plot.set_title('Alignment Coverage')
alignment_coverage_plot.set_ylabel('Counts')
alignment_coverage_plot.set_xlabel('Alignment coverage depth')

# visualize the plot
alignment_coverage_plot

%%bash

DATA_DIR=data/seq
RESULT_DIR=results/exercise2

REFERENCE_GENOME=data/ref/varicella.fasta
READS_1=$DATA_DIR/varicella_mut1.fastq
READS_2=$DATA_DIR/varicella_mut2.fastq


BOWTIE_INDEX_DIR=$RESULT_DIR/bowtie_index
ALIGNMENT_DIR=$RESULT_DIR/alignment

# Make all directories
mkdir $RESULT_DIR
mkdir $BOWTIE_INDEX_DIR
mkdir $ALIGNMENT_DIR

# Build the bowtie2 index
bowtie2-build -f $REFERENCE_GENOME $BOWTIE_INDEX_DIR/varicella


bowtie2 -x $BOWTIE_INDEX_DIR/varicella -1 $READS_1 -2 $READS_2 -S $ALIGNMENT_DIR/varicella_mut.sam

samtools view -b -S -o $ALIGNMENT_DIR/varicella_mut.bam $ALIGNMENT_DIR/varicella_mut.sam
    
samtools sort $ALIGNMENT_DIR/varicella_mut.bam -o $ALIGNMENT_DIR/varicella_mut.sorted.bam
    
samtools index $ALIGNMENT_DIR/varicella_mut.sorted.bam

! bcftools mpileup -f data/ref/varicella.fasta results/exercise2/alignment/varicella_mut.sorted.bam -O u > results/exercise2/varicella_variants.bcf

! bcftools call -c -v results/exercise2/varicella_variants.bcf > results/exercise2/varicella_variants.vcf

samtools tview results/exercise2/alignment/varicella_mut.sorted.bam data/ref/varicella.fasta -p NC_001348:77985

# note that this command is also interactive - run it in the terminal
grep -v "^#"  results/exercise2/varicella_variants.vcf | column -t | less -S

! samtools mpileup -r NC_001348:73233-73233 -f data/ref/varicella.fasta results/exercise2/alignment/varicella_mut.sorted.bam

! mkdir results/exercise3

! breseq -j 1 -o results/exercise3 -r data/ref/varicella.gb data/seq/varicella_mut*.fastq

! mkdir results/exercise4

! abyss-pe name=varicella k=128 B=8K --directory=results/exercise4 in='../../data/seq/varicella_l1.fastq ../../data/seq/varicella_l2.fastq'

BBT045: Sequencing Technologies Tutorial¶

Tutorial Overview¶

Initial setup: Work environment and Data¶

Directory and data setup¶

Work environment¶

Background¶

Setting up our environment¶

Installing relevant software¶

Running conda in Jupyter Notebooks¶

Exercise 1: Alignment¶

Protocol¶

Step 1: Init¶

Step 2: Preprocess¶

Step 3: Align sequences to reference¶

Step 4: Convert alignment to binary format¶

Step 5: Optimize alignment (pt 1)¶

Step 6: Optimize alignment (pt 2)¶

Step 7: Calculate alignment coverage¶

Question¶

Exercise 2: Finding Mutations with SAMtools¶

Protocol¶

Step 1: Align sequences to reference¶

Step 2: Identify point mutations¶

Step 3: Inspect mutations (pt 1)¶

Step 4: Inspect mutations (pt 2)¶

Questions¶

Q1¶

Q2¶

Q3¶

Exercise 3: Finding Mutations with breseq¶

Protocol¶

Step 1: Init¶

Step 2: Run¶

Questions¶

Q1¶

Q2¶

Q3¶

Exercise 4: De novo genome assembly¶

Protocol¶

Step 1: Init¶

Step 1: Assembly¶

Questions¶

Q1¶

Q2¶

Q3¶

Running `conda` in Jupyter Notebooks¶