Gene finding: Finding Start and Stop codons using R

R code

    library("stringr")
              findPotentialStartsAndStops<- function(sequence) # creating function
              {
          

 

Define a vector with the sequences of potential start and stop codons

    codons<- c("atg", "taa", "tag", "tga")
          

 

Find the number of occurrences of each type of potential start or stop codon

    for (i in 1:4)
              {
              codon<- codons[i]
          

 

Find all occurrences of codon "codon" in sequence "sequence"

occurrences<- as.data.frame(str_locate_all(sequence,codon))
          

 

Find the start positions of all occurrences of "codon" in sequence "sequence"

    codonpositions<- c(occurrences[[1]])
          

 

Find the total number of potential start and stop codons in sequence "sequence"

    numoccurrences<- length(codonpositions)
              if (i == 1){
            
          

 

Make a copy of vector "codonpositions" called "positions"

    positions<- codonpositions
          

 

Make a vector "types" containing "numoccurrences" copies of "codon"

    types<- rep(codon, numoccurrences)
              }
              else
              {
            
          

 

Add the vector "codonpositions" to the end of vector "positions":

    positions<- append(positions, codonpositions,after=length(positions))
            
          

 

Add the vector "rep(codon, numoccurrences)" to the end of vector "types":

    types<- append(types, rep(codon, numoccurrences),after=length(types))
              }
              }
          

 

Sort the vectors "positions" and "types" in order of position along the input sequence:

    indices<- order(positions)
              positions<- positions[indices]
              types<- types[indices]
          

 

Return a list variable including vectors "positions" and "types":

    mylist<- list(positions,types)
              return(mylist)
              }
              s1 <- "aaaatgcagtaacccatgccc"
              findPotentialStartsAndStops(s1)