#' Find locations of genomic regions relative to transcription start sites.
#'
#' @param genomic_regions A GRanges object. 
#' @param tss_gr A GRanges object with transcription start sites. Each range should have width 1. 
#' Upstream and downstream are relative to strand of tss_gr.
#' @return A GRanges object where all regions have "relative" as the sequence names and 
#' ranges are the location of TMRs relative to the TSS.  
#' @export
#' @examples
#' # Create query and subject GRanges 
#' genomic_regions <- GenomicRanges::GRanges(c("chr1:100-1000:+", "chr1:2000-3000:-"))
#' tss_gr <- GenomicRanges::GRanges(c("chr1:1500:+", "chr1:4000:-"))
#' 
#' # Calculate distances between query and subject
#' methodical::rangesRelativeToTSS(genomic_regions, tss_gr)
rangesRelativeToTSS <- function(genomic_regions, tss_gr){
  
  # Check that inputs have the correct data type
  stopifnot(is(genomic_regions, "GRanges"), is(tss_gr, "GRanges"))
  
  # Check that all tss ranges have width 1 and resize them with a warning if not
  if(!all(width(tss_gr) == 1)){
    warning("All regions in tss_gr should have a width of 1. Shortening each region so that it consists of only the most upstream position")
    tss_gr <- GenomicRanges::resize(tss_gr, 1, fix = "start")
  }

  # Get distances from start and end of ranges in gr from tss_gr
  relative_start <- methodical::strandedDistance(query_gr = resize(genomic_regions, 1, fix = "start"), subject_gr = tss_gr)
  relative_end <- methodical::strandedDistance(query_gr = resize(genomic_regions, 1, fix = "end"), subject_gr = tss_gr)
  
  # Create an IRanges with the relative distances
  relative_iranges <- IRanges::IRanges(pmin(relative_start, relative_end), pmax(relative_start, relative_end))
  
  # Convert IRanges to GRanges with "relative" as seqnames
  relative_granges <- GenomicRanges::GRanges(seqnames = "relative", ranges = relative_iranges)
  
  # Add metadata from gr to relative_granges
  mcols(relative_granges) <- mcols(genomic_regions)
  
  # Return relative_granges
  return(relative_granges)
  
}

#' Create a scatter plot with smoothed curve for values along adjacent loci in a genomic region
#'
#' @param genomic_region_values A data.frame with values associated with genomic regions. 
#' Row names must be the coordinates of genomic regions in character format (e.g chr1:1000-2000) and 
#' all regions must be located on the same sequence. The position of the first base in each region is used as the x-axis coordinate for the plot. 
#' @param sample_name Name of column in genomic_region_values to plot. Defaults to first column if none provided.  
#' @param reference_tss TRUE or FALSE indicating whether to show distances on the X-axis
#' relative to the TSS stored as an attribute `tss_range` of genomic_region_values. 
#' Alternatively, can provide a GRanges object with a single range for such a TSS site. 
#' In either case, will show the distance of genomic regions to the start of this region with genomic regions upstream 
#' relative to the reference_tss shown first. 
#' If FALSE (the default), the x-axis will instead show the start site coordinate of the genomic region. 
#' @param geom_point_params An optional list to explicitly set values of parameters to use with geom_point(). Use list(alpha = 0) to make points invisible. 
#' @param geom_smooth_params An optional list to explicitly set values of parameters to use with geom_smooth(). Use list(alpha = 0) to make line invisible.
#' @param title Title of the plot. Default is no title. 
#' @param xlabel Label for the X axis in the plot. Defaults to "Distance to TSS" if reference_tss is used or
#' "seqname position" where seqname is the name of the relevant sequence.
#' @param ylabel Label for the Y axis in the plot. Default is "Genomic Region Value".
#' @param value_colours A vector with two colours to use, the first for low values and the second for high values. Defaults are c("#53868B", "#CD2626").
#' @param reverse_x_axis TRUE or FALSE indicating whether x-axis should be reversed, 
#' for example if plotting a region on the reverse strand so that left side of plot corresponds to upstream.
#' @return A ggplot object 
#' @examples 
#' # Load methylation-values around the TUBB6 TSS
#' data("tubb6_meth_rse", package = "methodical")
#' tubb6_meth_rse <- eval(tubb6_meth_rse)
#' 
#' # Extract methylation values from tubb6_meth_rse
#' tubb6_methylation_values = methodical::extractGRangesMethSiteValues(meth_rse = tubb6_meth_rse)
#' 
#' # Plot methylation values around TUBB6 TSS
#' methodical::plotRegionValues(tubb6_methylation_values, sample_name = "N1", ylabel = "Methylation Value")
#' 
#' # Create same plot but showing the distance to the TUBB6 TSS on the x-axis
#' data("tubb6_tss", package = "methodical")
#' methodical::plotRegionValues(tubb6_methylation_values, sample_name = "N1",
#'   reference_tss = tubb6_tss, ylabel = "Methylation Value")
#' 
#' @export
plotRegionValues <- function(genomic_region_values, sample_name = NULL, reference_tss = FALSE, geom_point_params = list(), geom_smooth_params = list(), 
  title = NULL, xlabel = NULL, ylabel = "Genomic Region Value", value_colours = c("#53868B", "#CD2626"), reverse_x_axis = FALSE){
  
  # Check that inputs have the correct data type
  stopifnot(is(genomic_region_values, "data.frame"), is(sample_name, "character") | is.null(sample_name),
    S4Vectors::isTRUEorFALSE(reference_tss) | is(reference_tss, "GRanges"), 
    is(geom_point_params, "list"), is(geom_smooth_params, "list"),
    is(title, "character") | is.null(title) | is(title, "expression"), 
    is(xlabel, "character") | is.null(xlabel) | is(xlabel, "expression"),
    is(ylabel, "character") | is.null(ylabel) | is(ylabel, "expression"),
    is(value_colours, "character"),
    S4Vectors::isTRUEorFALSE(reverse_x_axis))
  
  # Check that value_colours is a vector of length two with valid colour names
  if(length(value_colours) != 2){
    stop("value_colours should be a vector of length two")
  } else {
    invisible(col2rgb(value_colours))
    low_colour <- value_colours[1]; high_colour <- value_colours[2]
  }
  
  # Check if row.names are genomic coordinates
  tryCatch(GRanges(row.names(genomic_region_values)),
    error = function(e) stop("row.names(genomic_region_values) do not seem to be genomic coordinates coercible to GRanges"))
  
  # If reference_tss is TRUE, try to extract tss_range from genomic_region_values
  if(is(reference_tss, "logical")){
    if(reference_tss){
      reference_tss <- attributes(genomic_region_values)$tss_range 
      if(is.null(reference_tss)){
        stop("reference_tss was set to TRUE, but genomic_region_values does not have an attribute called tss_range")
      }
    } else {
      reference_tss <- NULL
    }
  }
  
  # Check that reference_tss has a length of 1 if provided   
  if(!is.null(reference_tss) & (length(reference_tss) > 1 | !is(reference_tss, "GRanges"))){
    stop("GRanges indicated by reference_tss should have length of 1")
  }
  
  # Check that all genomic regions are on the same sequence
  if(length(unique(seqnames(GenomicRanges::GRanges(row.names(genomic_region_values))))) > 1){
    stop("All genomic regions must be located on the same sequence")
  }
  
  # Check that sample_name has length 1 if provided
  if(is.null(sample_name)){
    sample_name <- names(genomic_region_values)[1]
  } else if(length(sample_name) > 1){
    stop("sample_name should just be a character of length 1 if provided")
  }
  
  # Check that sample_name is in the names of genomic_region_values and that is is numeric
  if(!sample_name %in% names(genomic_region_values)){
    stop(paste(sample_name, "not the name of a column in genomic_region_values"))
  } else if(!is(genomic_region_values[[sample_name]], "numeric")){
    stop("genomic_region_values[[\"sample_name\"]] should be numeric")
  }
  
  # Create a data.frame with the selected column
  plot_df <- dplyr::select(genomic_region_values, values = !!sample_name)
  
  # Add meth_site_start position to plot_df
  plot_df$meth_site_start <- GenomicRanges::start(GenomicRanges::GRanges(row.names(plot_df)))
  
  # Decide x-axis values for genomic regions depending on whether reference_tss provided
  if(!is.null(reference_tss)){
    plot_df$meth_site_plot_position <- methodical::strandedDistance(query_gr = GRanges(row.names(plot_df)), subject_gr = reference_tss)
  } else {
    plot_df$meth_site_plot_position <- plot_df$meth_site_start 
  }
  
  # Subset plot_df for complete rows
  plot_df <- plot_df[complete.cases(plot_df), ]
  
  # Create xlabel for plot if not provided
  if(is.null(xlabel)){
    if(!is.null(reference_tss)){
      xlabel <- "Distance to TSS"
    } else {
      xlabel <- paste(seqnames(GenomicRanges::GRanges(row.names(genomic_region_values)))[1], "Position")
    }
  }
  
  # Define default parameter values for geom_point() and geom_smooth() and update with values input by the user
  geom_point_param_defaults = list(shape = 21, colour = "black", size = 4, alpha = 1)
  geom_point_params = modifyList(geom_point_param_defaults, geom_point_params)
  geom_point_params = modifyList(geom_point_params, list(mapping = aes(fill = values)))
  geom_smooth_param_defaults = list(color = "black", alpha = 0.75, se = FALSE, span = 20/nrow(plot_df))
  geom_smooth_params = modifyList(geom_smooth_param_defaults, geom_smooth_params)
  
  # Create a scatter plot of Value and return
  meth_site_plot <- ggplot(data = plot_df, mapping = aes(x = meth_site_plot_position, y = values)) +
    do.call(geom_point, geom_point_params) +
    do.call(geom_smooth, geom_smooth_params) +
    theme_bw() +
    theme(plot.title = element_text(hjust = 0.5, size = 24), legend.text = element_text(size = 12),
      axis.title = element_text(size = 20), 
      axis.text = element_text(size = 18), legend.position = "None") +
    scale_x_continuous(expand = c(0.005, 0.005), labels = scales::comma) +
    scale_y_continuous(expand = expansion(mult = c(0.05, 0.05))) + 
    scale_fill_gradient2(low = low_colour, high = high_colour, mid = "white", midpoint = 0) +
    labs(x = xlabel, y = ylabel, title = title, color = NULL)
  
  # Add reference_tss as an attribute to plot if it was provided
  if(!is.null(reference_tss)){attributes(meth_site_plot)$tss_range <- reference_tss}
  
  # Reverse x-axis if specified 
  if(reverse_x_axis){
    meth_site_plot <- meth_site_plot + scale_x_reverse(expand = c(0.005, 0.005), labels = scales::comma)
  }
  
  return(meth_site_plot)
}

#' Plot the correlation coefficients for methylation sites within a region and an associated feature of interest
#'
#' @param meth_site_cor_values A data.frame with correlation values associated with methylation sites, such as
#' returned by `calculateMethSiteTranscriptCors`. There should be one column called `meth_site` giving the 
#' coordinates of methylation sites in character format and another column called `cor` giving the correlation 
#' between the methylation values of the methylation sites and a feature of interest. All methylation sites must be 
#' located on the same sequence. 
#' @param reference_tss TRUE or FALSE indicating whether to show distances on the X-axis
#' relative to the TSS stored as an attribute `tss_range` of meth_site_cor_values. 
#' Alternatively, can provide a GRanges object with a single range for such a TSS site. 
#' In either case, will show the distance of methylation sites to the start of this region with methylation sites upstream 
#' relative to the reference_tss shown first. 
#' If FALSE (the default), the x-axis will instead show the start site coordinate of the methylation site. 
#' @param title Title of the plot. Default is no title. 
#' @param xlabel Label for the X axis in the plot. Defaults to "Distance to TSS" if reference_tss is used or
#' "seqname position" where seqname is the name of the relevant sequence.
#' @param ylabel Label for the Y axis in the plot. Default is "Correlation Coefficient".
#' @param value_colours A vector with two colours to use, the first for low values and the second for high values. Defaults are c("#7B5C90", "#bfab25"). 
#' @param reverse_x_axis TRUE or FALSE indicating whether x-axis should be reversed, 
#' for example if plotting a region on the reverse strand so that left side of plot corresponds to upstream.
#' @return A ggplot object 
#' @examples 
#' # Load methylation-transcript correlation results for TUBB6 gene
#' data("tubb6_cpg_meth_transcript_cors", package = "methodical")
#' 
#' # Plot methylation-transcript correlation values around TUBB6 TSS
#' methodical::plotMethSiteCorCoefs(tubb6_cpg_meth_transcript_cors, ylabel = "Spearman Correlation")
#' 
#' # Create same plot but showing the distance to the TUBB6 TSS on the x-axis
#' methodical::plotMethSiteCorCoefs(tubb6_cpg_meth_transcript_cors, 
#'   ylabel = "Spearman Correlation", reference_tss = attributes(tubb6_cpg_meth_transcript_cors)$tss_range)
#' 
#' @export
plotMethSiteCorCoefs <- function(meth_site_cor_values, reference_tss = FALSE, 
  title = NULL, xlabel = NULL, ylabel = "Correlation Coefficient", value_colours = c("#7B5C90", "#bfab25"), reverse_x_axis = FALSE){
  
  # Check that inputs have the correct data type
  stopifnot(is(meth_site_cor_values, "data.frame"),
    S4Vectors::isTRUEorFALSE(reference_tss) | is(reference_tss, "GRanges"), 
    is(title, "character") | is.null(title), is(xlabel, "character") | is.null(xlabel),
    is(ylabel, "character") | is.null(ylabel), is(value_colours, "character"),
    S4Vectors::isTRUEorFALSE(reverse_x_axis))
  
  # Check that value_colours is a vector of length two with valid colour names
  if(length(value_colours) != 2){
    stop("value_colours should be a vector of length two")
  } else {
    invisible(col2rgb(value_colours))
    low_colour <- value_colours[1]; high_colour <- value_colours[2]
  }
  
  # Check that meth_site_cor_values has a column called meth_site that can be converted to GRanges and 
  # a numeric column called cor with values between 0 and 1
  if(!all(c("meth_site", "cor") %in% names(meth_site_cor_values))){
    stop("meth_site_cor_values does not have columns called both meth_site and cor")
  } 
  tryCatch(GRanges(meth_site_cor_values[["meth_site"]]),
    error = function(e) stop("meth_site_cor_values[[\"meth_site\"]] does not seem to be genomic coordinates coercible to GRanges"))
  if(!is(meth_site_cor_values[["cor"]], "numeric") | max(abs(meth_site_cor_values[["cor"]]), na.rm = TRUE) > 1){
    stop("meth_site_cor_values[[\"cor\"]] should be a numeric value with a maximum absolute value of 1")
  }
  
  # Change meth_site column to row names
  meth_site_cor_values <- tibble::column_to_rownames(meth_site_cor_values, "meth_site")
  
  # If reference_tss is TRUE, try to extract tss_range from meth_site_cor_values
  if(is(reference_tss, "logical")){
    if(reference_tss){
      reference_tss <- attributes(meth_site_cor_values)$tss_range 
      if(is.null(reference_tss)){
        stop("reference_tss was set to TRUE, but meth_site_cor_values does not have an attribute called tss_range")
      }
    } else {
      reference_tss <- NULL
    }
  }
  
  # Check that reference_tss has a length of 1 if provided   
  if(!is.null(reference_tss) & (length(reference_tss) > 1 | !is(reference_tss, "GRanges"))){
    stop("GRanges indicated by reference_tss should have length of 1")
  }
  
  # Check that all methylation sites are on the same sequence
  if(length(unique(seqnames(GenomicRanges::GRanges(row.names(meth_site_cor_values))))) > 1){
    stop("All methylation sites must be located on the same sequence")
  }
  
  # Create a data.frame with the selected column
  plot_df <- dplyr::select(meth_site_cor_values, values = cor)
  
  # Add meth_site_start position to plot_df
  plot_df$meth_site_start <- GenomicRanges::start(GenomicRanges::GRanges(row.names(plot_df)))
  
  # Decide x-axis values for methylation sites depending on whether reference_tss provided
  if(!is.null(reference_tss)){
    plot_df$meth_cor_plot_position <- methodical::strandedDistance(query_gr = GRanges(row.names(plot_df)), subject_gr = reference_tss)
  } else {
    plot_df$meth_cor_plot_position <- plot_df$meth_site_start 
  }
  
  # Subset plot_df for complete rows
  plot_df <- plot_df[complete.cases(plot_df), ]
  
  # Create xlabel for plot if not provided
  if(is.null(xlabel)){
    if(!is.null(reference_tss)){
      xlabel <- "Distance to TSS"
    } else {
      xlabel <- paste(seqnames(GenomicRanges::GRanges(row.names(meth_site_cor_values)))[1], "Position")
    }
  }
  
  # Create a scatter plot of Value and return
  meth_cor_plot <- ggplot(data = plot_df, mapping = aes(x = meth_cor_plot_position, y = values)) +
    geom_line(color = "black", alpha = 0.75) +
    geom_point(shape = 21, colour = "black", size = 4, alpha = 1, aes(fill = values)) +
    theme_bw() +
    theme(plot.title = element_text(hjust = 0.5, size = 24), legend.text = element_text(size = 12),
      axis.title = element_text(size = 20), 
      axis.text = element_text(size = 18), legend.position = "None") +
    scale_x_continuous(expand = c(0.005, 0.005), labels = scales::comma) +
    scale_y_continuous(expand = expansion(mult = c(0.05, 0.05))) + 
    scale_fill_gradient2(low = low_colour, high = high_colour, mid = "white", midpoint = 0) +
    labs(x = xlabel, y = ylabel, title = title, color = NULL)
  
  # Add reference_tss as an attribute to plot if it was provided
  if(!is.null(reference_tss)){attributes(meth_cor_plot)$tss_range <- reference_tss}
  
  # Reverse x-axis if specified 
  if(reverse_x_axis){
    meth_cor_plot <- meth_cor_plot + scale_x_reverse(expand = c(0.005, 0.005), labels = scales::comma)
  }
  
  return(meth_cor_plot)
}

#' Add TMRs to a methylation site value plot
#'
#' @param meth_site_plot A plot of Value around a TSS.
#' @param tmrs_gr A GRanges object giving the position of TMRs.
#' @param reference_tss An optional GRanges object with a single range. If provided, the x-axis will 
#' show the distance of methylation sites to the start of this region with methylation sites upstream
#' relative to the reference_tss shown first. If not, the x-axis will show the start site coordinate of the methylation site.
#' @param transcript_id An optional transcript ID. If provided, will attempt to filter tmrs_gr and reference_tss using a metadata column called transcript_id with 
#' a value identical to the provided one. 
#' @param tmr_colours A vector with colours to use for negative and positive TMRs. Defaults to "#7B5C90" for negative and "#BFAB25" for positive TMRs. 
#' @param linewidth A numeric value to be provided as linewidth for geom_segment(). 
#' @return A ggplot object
#' @export
#' @examples 
#' # Load methylation-transcript correlation results for TUBB6 gene
#' data("tubb6_cpg_meth_transcript_cors", package = "methodical")
#' 
#' # Plot methylation-transcript correlation values around TUBB6 TSS
#' tubb6_correlation_plot <- methodical::plotMethSiteCorCoefs(tubb6_cpg_meth_transcript_cors, ylabel = "Spearman Correlation")
#'   
#' # Find TMRs for TUBB6
#' tubb6_tmrs <- findTMRs(correlation_list = list(ENST00000591909 = tubb6_cpg_meth_transcript_cors))
#' 
#' # Plot TMRs on top of tubb6_correlation_plot
#' methodical::plotTMRs(tubb6_correlation_plot, tmrs_gr = tubb6_tmrs)
plotTMRs <- function(meth_site_plot, tmrs_gr, reference_tss = NULL, transcript_id = NULL, tmr_colours = c("#A28CB1", "#D2C465"), linewidth = 5){
  
  # Check that inputs have the correct data type
  stopifnot(is(meth_site_plot, "ggplot"), is(tmrs_gr, "GRanges"),
    is(reference_tss, "GRanges") | is.null(reference_tss), 
    is(transcript_id, "character") | is.null(transcript_id),
    is(tmr_colours, "character"), is(linewidth, "numeric"))
    
  # Filter tmrs_gr and reference_tss for transcript_id if provided
  if(!is.null(transcript_id)){
    tmrs_gr <- tmrs_gr[tmrs_gr$transcript_id == transcript_id]
    reference_tss <- reference_tss[reference_tss$transcript_id == transcript_id]
  }
  
  # Check that if reference_tss is provided, it has a length of 1
  if(!is.null(reference_tss) & length(reference_tss) > 1){
    stop("reference_tss should have length of 1 if provided")
  }
  
  # Decide positions for tmrs depending on whether reference_tss provided
  if(!is.null(reference_tss)){
      tmrs_df <- data.frame(methodical::rangesRelativeToTSS(
        genomic_regions = tmrs_gr, tss_gr = reference_tss))
  } else {
      tmrs_df <- data.frame(tmrs_gr)
  }
  
  # Make direction a factor with both Negative and Positive
  tmrs_df$direction <- factor(tmrs_df$direction, levels = c("Negative", "Positive"))
  
  # Add TMRs to meth_site_plot
  meth_site_plot_with_tmrs <- meth_site_plot +
  geom_segment(data = tmrs_df, aes(x = start, xend = end, y = 0, yend = 0, color = direction), 
    linewidth = linewidth) + 
  scale_color_manual(values = setNames(tmr_colours, levels(tmrs_df$direction)), drop = FALSE) + 
  labs(color = "TMR Direction")
  
  # Return meth_site_plot_with_tmrs
  return(meth_site_plot_with_tmrs)
  
}

#' Create plot of Methodical score values for methylation sites around a TSS
#'
#' @param genomic_region_values A data.frame with correlation values for methylation sites. There should be one column called "cor".
#' and another called "p_val" which are used to calculate the Methodical score. row.names should be the names of methylation sites and all methylation sites must be located on the same sequence. 
#' @param reference_tss An optional GRanges object with a single range. If provided, the x-axis will show the distance of methylation sites to the start of this region with methylation sites upstream.
#' relative to the reference_tss shown first. If not, the x-axis will show the start site coordinate of the methylation site. 
#' @param p_value_threshold The p-value threshold used to identify TMRs. Default value is 0.005. Set to NULL to turn off significance thresholds.
#' @param smooth_scores TRUE or FALSE indicating whether to display a curve of smoothed Methodical scores on top of the plot. Default is TRUE.
#' @param offset_length Offset length to be supplied to calculateSmoothedMethodicalScores. Default is 10. 
#' @param smoothing_factor Smoothing factor to be provided to calculateSmoothedMethodicalScores. Default is 0.75. 
#' @param smoothed_curve_colour Colour of the smoothed curve. Default is "black".
#' @param linewidth Line width of the smoothed curve. Default value is 1.
#' @param curve_alpha Alpha value for the curve. Default value is 0.75. 
#' @param title Title of the plot. Default is no title. 
#' @param xlabel Label for the X axis in the plot. Default is "Genomic Position".
#' @param low_colour Colour to use for low values. Default value is "#7B5C90".
#' @param high_colour Colour to use for high values. Default value is "#BFAB25".
#' @return A ggplot object 
#' @export
#' @examples 
#' # Load methylation-transcript correlation results for TUBB6 gene
#' data("tubb6_cpg_meth_transcript_cors", package = "methodical")
#'   
#' # Calculate and plot Methodical scores from correlation values
#' methodical::plotMethodicalScores(tubb6_cpg_meth_transcript_cors, reference_tss = attributes(tubb6_cpg_meth_transcript_cors)$tss_range)
plotMethodicalScores <- function(genomic_region_values, reference_tss = NULL, p_value_threshold = 0.005,
  smooth_scores = TRUE, offset_length = 10, smoothing_factor = 0.75, 
  smoothed_curve_colour = "black", linewidth = 1, curve_alpha = 0.75, 
  title = NULL, xlabel = "Genomic Position", low_colour = "#7B5C90", high_colour = "#BFAB25"){
  
  # Check that inputs have the correct data type
  stopifnot(is(genomic_region_values, "data.frame"), 
    is(reference_tss, "GRanges") | is.null(reference_tss), 
    is(p_value_threshold, "numeric") | is.null(p_value_threshold), S4Vectors::isTRUEorFALSE(smooth_scores),
    is(offset_length, "numeric"), is(smoothing_factor, "numeric"),
    is(smoothed_curve_colour, "character"), is(linewidth, "numeric"),
    is(curve_alpha, "numeric"), is(title, "character") | is.null(title),
    is(xlabel, "character") | is.null(xlabel), is(low_colour, "character"), 
    is(high_colour, "character"))
  
  # Change meth_site column to row names
  genomic_region_values_plot_df <- tibble::column_to_rownames(genomic_region_values, "meth_site")
  
  # Check that if reference_tss is provided, it has a length of 1
  if(!is.null(reference_tss) & length(reference_tss) > 1){stop("reference_tss should have length of 1 if provided")}
  
  # Check that all methylation sites are on the same sequence
  if(length(seqlevels(GenomicRanges::GRanges(row.names(genomic_region_values_plot_df)))) > 1){
    stop("All methylation sites must be located on the same sequence")
  }
  
  # Add meth_site_start position to genomic_region_values_plot_df
  genomic_region_values_plot_df$meth_site_start <- GenomicRanges::start(GenomicRanges::GRanges(row.names(genomic_region_values_plot_df)))
  
  # Decide x-axis values for methylation sites depending on whether reference_tss provided
  if(!is.null(reference_tss)){
    genomic_region_values_plot_df$meth_site_plot_position <- methodical::strandedDistance(query_gr = GRanges(row.names(genomic_region_values_plot_df)), subject_gr = reference_tss)
  } else {
    genomic_region_values_plot_df$meth_site_plot_position <- genomic_region_values_plot_df$meth_site_start 
  }
  
  # Convert p-values into methodical score
  genomic_region_values_plot_df$methodical_score <- log10(genomic_region_values_plot_df$p_val) * -sign(genomic_region_values_plot_df$cor)
  
  # Subset genomic_region_values_plot_df for necessary columns
  genomic_region_values_plot_df <- dplyr::select(genomic_region_values_plot_df, meth_site_start, meth_site_plot_position, methodical_score, cor)
  
  # Create a scatter plot of Value and return
  meth_site_plot <- ggplot(data = genomic_region_values_plot_df, mapping = aes(x = meth_site_plot_position, y = methodical_score)) +
    geom_line(color = "black", alpha = 0.75) +
    geom_point(shape = 21, colour = "black", size = 4, alpha = 1, aes(fill = cor)) +
    theme_bw() +
    theme(plot.title = element_text(hjust = 0.5, size = 24), legend.text = element_text(size = 12),
      axis.title = element_text(size = 20), axis.text = element_text(size = 18), legend.position = "None") +
    scale_x_continuous(expand = c(0.005, 0.005), labels = scales::comma) +
    scale_y_continuous(expand = expansion(mult = c(0.05, 0.05))) + 
    scale_fill_gradient2(low = low_colour, high = high_colour, mid = "white", midpoint = 0) +
    labs(x = xlabel, y = "Methodical Score", title = title, color = NULL) 
  
  # Add TMR thresholds if specified
  if(!is.null(p_value_threshold)){
    meth_site_plot <- meth_site_plot + 
      geom_hline(yintercept = log10(p_value_threshold), linetype = "dashed", colour = low_colour) +
      geom_hline(yintercept = -log10(p_value_threshold), linetype = "dashed", colour = high_colour) 
  }
  
  # Add smoothed Methodical scores if specified
  if(smooth_scores){
    smoothed_methodical_scores <- calculateSmoothedMethodicalScores(correlation_df = genomic_region_values, 
      offset_length = offset_length, smoothing_factor = smoothing_factor)
    meth_site_plot <- meth_site_plot +
    geom_line(mapping = aes(y = smoothed_methodical_scores), 
      color = smoothed_curve_colour, alpha = curve_alpha, linewidth = linewidth)
  }
  
  return(meth_site_plot)
}

#' Create a plot with genomic annotation for a plot of values at methylation sites. 
#' 
#' Works with plots returned by `plotRegionValues()`, `plotMethSiteCorCoefs()` or `plotMethodicalScores`.
#' Can combine the meth site values plot and genomic annotation together into a 
#' single plot or return the annotation plot separately. 
#'
#' @param meth_site_plot A plot of methylation site values (generally methylation level or correlation of methylation with transcription) around a TSS
#' @param annotation_grl A GRangesList object (or list coercible to a GRangesList) where each component GRanges gives 
#' the locations of different classes of regions to display. Each class of region will 
#' be given a separate colour in the plot, with regions ordered by the order of `names(annotation_grl)`. 
#' @param grl_colours An optional vector of colours used to display each of the 
#' GRanges making up annotation_grl. Must have same length as annotation_grl. 
#' @param reference_tss TRUE or FALSE indicating whether to show distances on the X-axis
#' relative to the TSS stored as an attribute `tss_range` of meth_site_plot. 
#' Alternatively, can provide a GRanges object with a single range for such a TSS site. 
#' In either case, will show the distance of methylation sites to the start of this region with methylation sites upstream 
#' relative to the reference_tss shown first. 
#' If FALSE (the default), the x-axis will instead show the start site coordinate of the methylation site. 
#' relative to the reference_tss shown first. If not, the x-axis will show the start site coordinate of the methylation site.
#' @param ylab The title to give the Y axis in the annotation plot. Default is "Genome Annotation".
#' @param annotation_line_size Linewidth for annotation plot. Default is 5. 
#' @param annotation_plot_proportion A value giving the proportion of the height of the plot devoted to the annotation. Default is 0.5. 
#' @param keep_meth_site_plot_legend TRUE or FALSE indicating whether to retain the legend of meth_site_plot, if it has one. Default value is FALSE. 
#' @param annotation_plot_only TRUE or FALSE indicating whether to return only the annotation plot. Default is to combine meth_site_plot with the annotation. 
#' @return A ggplot object
#' @export
#' @examples 
#' # Get CpG islands from UCSC
#' data("hg38_cpg_islands", package = "methodical")
#' hg38_cpg_islands <- GRangesList(split(hg38_cpg_islands, hg38_cpg_islands$type))
#'
#' # Load plot with CpG methylation correlation values for TUBB6
#' data("tubb6_correlation_plot", package = "methodical")
#' 
#' # Add positions of CpG islands to tubb6_correlation_plot
#' methodical::annotatePlot(tubb6_correlation_plot, annotation_grl = hg38_cpg_islands, annotation_plot_proportion = 0.3)
#' 
annotatePlot <- function(meth_site_plot, annotation_grl, reference_tss = FALSE, grl_colours = NULL, 
  annotation_line_size = 5, ylab = "Genome Annotation", annotation_plot_proportion = 0.5, keep_meth_site_plot_legend = FALSE, annotation_plot_only = FALSE){
  
  # If annotation_grl is a list, attempt to coerce it to a GRangesList
  if(is(annotation_grl, "list")){
    tryCatch(annotation_grl <- GRangesList(annotation_grl), 
      error = function(e) stop("annotation_grl is a list but cannot be coerced to a GRangesList"))
  }
  
  # Check that inputs have the correct data type
  stopifnot(is(meth_site_plot, "ggplot"), is(annotation_grl, "GRangesList"),
    S4Vectors::isTRUEorFALSE(reference_tss) | is(reference_tss, "GRanges"), 
    is(grl_colours, "character") | is.null(grl_colours),
    is(annotation_line_size, "numeric"), is(annotation_line_size, "numeric"),
    is(annotation_plot_proportion, "numeric"), S4Vectors::isTRUEorFALSE(keep_meth_site_plot_legend),
    S4Vectors::isTRUEorFALSE(annotation_plot_only))
  
  # Create colours for region classes if grl_colours not provided and 
  # check that it has the same length as annotation_grl if it is
  if(is.null(grl_colours)){
    palette <- c("#9E0142", "#D53E4F", "#F46D43", "#FDAE61", "#FEE08B", "#FFFFBF", 
      "#E6F598", "#ABDDA4", "#66C2A5", "#3288BD", "#5E4FA2")
    grl_colours <- colorRampPalette(palette)(length(annotation_grl))
  } else if(length(annotation_grl) != length(grl_colours)){
    stop("grl_colours must have the same length as annotation_grl")
  }
    
  # If reference_tss is TRUE, try to extract tss_range from meth_site_plot
  if(is(reference_tss, "logical")){
    if(reference_tss){
      reference_tss <- attributes(meth_site_plot)$tss_range 
      if(is.null(reference_tss)){
        stop("reference_tss was set to TRUE, but meth_site_plot does not have an attribute called tss_range")
      }
    } else {
      reference_tss <- NULL
    }
  }
  
  # Check that reference_tss has a length of 1 if provided   
  if(!is.null(reference_tss) & (length(reference_tss) > 1 | !is(reference_tss, "GRanges"))){
    stop("GRanges indicated by reference_tss should have length of 1")
  }
  
  # Gives GRanges generic names if names are missing
  if(is.null(names(annotation_grl))){
    message("names(annotation_grl) is NULL. Setting to genomic_regions_1, genomic_regions_2, etc.")
    names(annotation_grl) <- paste0("genomic_regions_", seq_along(annotation_grl))
  }
  
  # Flatten annotation_grl
  annotation_grl <- unlist(GRangesList(lapply(annotation_grl, unname)))
  annotation_grl$region_type <- factor(names(annotation_grl), unique(names(annotation_grl)))
  
  # Check that annotation_plot_proportion is between 0 and 1
  if(annotation_plot_proportion < 0 | annotation_plot_proportion > 1){
    stop("annotation_plot_proportion should be between 0 and 1")
  }
  
  # Get most extreme methylation sites in plot
  meth_site_min <- row.names(meth_site_plot$data)[which.min(start(GRanges(row.names(meth_site_plot$data))))]
  meth_site_max <- row.names(meth_site_plot$data)[which.max(start(GRanges(row.names(meth_site_plot$data))))]
  
  # Create a GRanges object which covers the plot
  plot_region <- reduce(GRanges(c(meth_site_min, meth_site_max)), min.gapwidth = .Machine$integer.max)
  
  # Filter annotation regions for those which overlap plot_region
  annotation_grl <- subsetByOverlaps(annotation_grl, plot_region)
  
  # Update start and end of annotation_grl so that they lie within plot_region
  start(annotation_grl) <- pmax(start(annotation_grl), start(plot_region))
  end(annotation_grl) <- pmin(end(annotation_grl), end(plot_region))
  
  # Decide x-axis values for methylation sites depending on whether reference_tss provided
  if(!is.null(reference_tss)){
    meth_site_plot$data$meth_site_plot_position <- methodical::strandedDistance(query_gr = GRanges(row.names(meth_site_plot$data)), subject_gr = reference_tss)
    annotation_grl <- methodical::rangesRelativeToTSS(genomic_regions = annotation_grl, tss_gr = reference_tss)
  } else {
    meth_site_plot$data$meth_site_plot_position <- meth_site_plot$data$meth_site_start 
  }
  
  # Convert annotation_grl to a data.frame
  annotation_df <- data.frame(annotation_grl)
  
  # Ensure region_type is a factor and reverse their order so that they are displayed from top to bottom in the plot
  annotation_df$region_type <- factor(annotation_df$region_type)
  annotation_df$region_type <- factor(annotation_df$region_type, levels = rev(levels(annotation_df$region_type)))
  
  # Extract axis text size, axis title size and x-axis title from meth_site_plot
  axis_text_size <- meth_site_plot$theme$axis.text$size
  axis_title_size <- meth_site_plot$theme$axis.title$size
  x_axis_title <- meth_site_plot$labels$x
  
  # Check if reverse_x_axis was set when creating meth_site_plot and then 
  # set the limits and scale function for the x-axis accordingly
  if(meth_site_plot$scales$get_scales("x")$trans$name == "reverse"){
    limits = -ggplot_build(meth_site_plot)$layout$panel_params[[1]]$x.range
    scale_x_function = scale_x_reverse
  } else {
    limits = ggplot_build(meth_site_plot)$layout$panel_params[[1]]$x.range
    scale_x_function = scale_x_continuous
  }
  
  # Create a linerange plot showing the positions of different genomic elements
  annotation_plot <- ggplot(annotation_df, aes(xmin = start, xmax = end, x = NULL, y = region_type,  group = region_type, color = region_type)) + 
    geom_linerange(linewidth = annotation_line_size, position = position_dodge(0.06)) +
    theme_bw() + 
    theme(plot.title = element_text(hjust = 0.5, size = 24),
      axis.title = element_text(size = axis_title_size), 
      axis.text = element_text(size = axis_text_size), legend.position = "None")  +
    labs(x = x_axis_title, y = ylab) +
    scale_x_function(expand = expansion(mult = c(0, 0)), labels = scales::comma, limits = limits) + 
    scale_color_manual(values = grl_colours, guide = guide_legend(override.aes = list(color = "white"))) +
    # The following code makes the legend invisible
    theme(
        legend.text = element_text(color = "white"),
        legend.title = element_text(color = "white")
    )
  
  # Return annotation_plot is return_only_annotation_plot is TRUE
  if(annotation_plot_only){return(annotation_plot)}
  
  # Get legend from meth_site_plot
  meth_site_plot_legend <- suppressWarnings(cowplot::get_legend(meth_site_plot))
  legends <- cowplot::plot_grid(meth_site_plot_legend, NULL, nrow = 2, rel_heights = c(1 - annotation_plot_proportion, annotation_plot_proportion))
  
  # Combine meth_site_plot and annotation_plot
  annotated_meth_site_plot <- cowplot::plot_grid(meth_site_plot + theme(legend.position = "none", 
    axis.text.x = element_blank(), axis.ticks.x = element_blank(), axis.title.x = element_blank()), annotation_plot, 
    nrow = 2, align = "v", rel_heights = c(1 - annotation_plot_proportion, annotation_plot_proportion))
  
  # Add legend if specified
  if(keep_meth_site_plot_legend){
    annotated_meth_site_plot <- cowplot::plot_grid(annotated_meth_site_plot, legends, rel_widths = c(1, 0.2))
  }
  
  # Return annotated_meth_site_plot
  return(annotated_meth_site_plot)
}
