## ----setup, include=FALSE-----------------------------------------------------
knitr::opts_chunk$set(echo = TRUE)
options(rmarkdown.html_vignette.check_title = FALSE)

## -----------------------------------------------------------------------------
#loading packages we will need today:
library(ape)
library(evolved)

## -----------------------------------------------------------------------------
data("cytOxidase")

## ---- results = F-------------------------------------------------------------
cytOxidase["human"]

## -----------------------------------------------------------------------------
length(cytOxidase)

## -----------------------------------------------------------------------------
names(cytOxidase)

## -----------------------------------------------------------------------------
unlist(lapply(cytOxidase, nchar))

## -----------------------------------------------------------------------------
cytOxidase["platypus"]

## -----------------------------------------------------------------------------
countSeqDiffs(cytOxidase, "human", "alligator")  

## -----------------------------------------------------------------------------
nchar(cytOxidase["platypus"])

## -----------------------------------------------------------------------------
seq_diff_vec <- c(4, 8, 3, 6, 0) # note that these numbers are completely made up
names(seq_diff_vec) <- c("snake-chimp", "human-echinoderm", "alligator-insect", 
                         "chimp-echinoderm", "human-chimp")

seq_diff_vec

## -----------------------------------------------------------------------------
seq_diff_vec["snake-chimp"]

## -----------------------------------------------------------------------------
m <- matrix(data = NA,  # placeholder value for each matrix's cell will be NA 
                   # (later will be replaced by real data)
       nrow = 3,  # one row for each taxon group
       ncol = 3,  # one col for each taxon group
       dimnames = list(c("snake", "human", "insect"), # these are the row names
                       c("snake", "human", "insect")) # these are the column names
)

## -----------------------------------------------------------------------------
m

## -----------------------------------------------------------------------------
m[1,] <- c(0, 5, 2) # replacing the entire first row with fake numbers
m[2,] <- c(5, 0, 80) # replacing the entire second row with fake numbers
m[3,] <- c(2, 80, 0) # repalcing the entire third row with fake numbers
m

## ---- echo=FALSE--------------------------------------------------------------
animal_phy <- ape::read.tree(text=paste0("(cnidaria:1,((((bryozoa:1,(mollusk:1,annelid:1):1):1,(crustacea:1,insect:1):1):1,(echinoderm:1,(lamprey:1,(shark:1,(fish:1,(frog:1,((platypus:1,(human:1,chimpanzee:1):1):1,(snake:1,(alligator:1,bird:1):1):1):1):1):1):1):1):1):1):1);"))

plot.phylo(animal_phy)

# We can also add node labels to facilitate our discussion:
ape::nodelabels(cex=.65, bg = "lightgreen")

## -----------------------------------------------------------------------------
# For instance, what is the probability of observing 10 substitutions in a 37-long 
# AA sequence after 10 million years of divergence
# if the substitution rate is equal to 0.1 substitutions
# per site per million years?

rate_A <- 0.1
AA_seq_length <- 37

obs_subs <- 10  # amount of substitutions that have occurred
obs_time <- 10  # amount of time that has passed

prob_given_rate_A <- dpois(
  x = obs_subs, lambda = obs_time * rate_A * AA_seq_length)

prob_given_rate_A # this is the likelihood

# finally, we take the (natural) log, to get
# the log-likelihood of observing the data
# given the model and the rate value:
log(prob_given_rate_A)

# the questions is, then: what is the likelihood of observing >the same< data 
# under different values of substitution rate?