library(BiocManager)
library(Biostrings)

Test values

test_fasta <- "NNGCATGCGATGACTCTTGCTGACCTTTTATTAAGAACTAAATGGACAATATTATGGAGCATTTCATGTATAAATTGGTGCGTAAAATCGTTGGATCTCTCTTCTAAGTACATCCTACTATAACAATCAAGAAAAACAAGAAAATCGGACAAAACAATCAAGTATGGATTCTAGAACAGTTGGTATATTAGGAGGGGGACAATTGGGACGTATGATTGTTGAGTAAGCTAACAGGCTCAACATTAAGACGGTAATACTAGATGCTGAAAATTCTCCTGCCAAACAAATAAGCAACTCCAATGACCACGTTAATGGCTCCTTTTCCAATCCTCTTGATATCGAAAAACTAGCTGAAAAATGTGATGTGCTAACGATTGAGATTGAGCATGTTGATGTTCCTACACTAAAGAATCTTCAAGTAAAACATCCCAAATTAAAAATTTACCCTTCTCCAGAAACAATCAGATTGATACAAGACAAATATATTCAAAAAGAGCATTTAATCAAAAATGGTATAGCAGTTACCCAAAGTGTTCCTGTGGAACAAGCCAGTGAGACGTCCCTATTGAATGTTGGAAGAGATTTGGGTTTTCCATTCGTCTTGAAGTCGAGGACTTTGGCATACGATGGAAGAGGTAACCGCTTTTGTGAAAAGAGAAGAGTCATTTGTCAGCATAGCTGTAATAATCAATCATGACGTAAGAAATGTATCATAATTAAAAGTTGTTAAAGATGTCAGTGTTATGTTGGTGTTACAAAATTCTCGGCTTCTCACTAATATTTAATATCTCTTAAATTTTATCTGTCTTTGATTCTTTTAAGAAAAGTTATGTATTATTCAAGAAAAAGTCAATTCCGCAATCCAAAAGAAGG"

correct_sequence <- "ATGGATTCTAGAACAGTTGGTATATTAGGAGGGGGACAATTGGGACGTATGATTGTTGAGTAAGCTAACAGGCTCAACATTAAGACGGTAATACTAGATGCTGAAAATTCTCC"

L_boundary <- "ATGGATTCTAGAAC"
R_boundary <- "GCTGAAAATTCTCC"

Test basic concept

test_dna <- DNAString(test_fasta) # convert to Biostrings::DNAString object
L_match <- matchPattern(L_boundary, test_dna) # range of left match
R_match <- matchPattern(R_boundary, test_dna) # range of right match

# does it work
test_dna[start(L_match): end(R_match)] == DNAString(correct_sequence) 
[1] TRUE

Encapsulate

TimTimTrimTrim <- function(Subject, L_boundary="", R_boundary=""){ # input as strings
  dna_obj <- DNAString(Subject)
  
  L_match <- matchPattern(L_boundary, dna_obj)
  R_match <- matchPattern(R_boundary, dna_obj)
  
  return(dna_obj[start(L_match):end(R_match)]) # return Biostring object
}

# Does it still work
DNAString(correct_sequence) == TimTimTrimTrim(test_fasta, 
                                              L_boundary="ATGGATTCTAGAAC", 
                                              R_boundary="GCTGAAAATTCTCC")
[1] TRUE

Test Example 2

test_fasta2 <- "NNGCATGCGATGACTCTTGCTGACCTTTTATTAAGAACTAAATGGACAATATTATGGAGCATTTCATGTATAAATTGGTGCGTAAAATCGTTGGATCTCTCTTCTAAGTACATCCTACTATAACAATCAAGAAAAACAAGAAAATCGGACAAAACAATCAAGTATGGATTCTAGAACAGTTGGTATATTAGGAGGGGGACAATTGGGACGTATGATTGTTGAGTAAGCTAACAGGCTCAACATTAAGACGGTAATACTAGATGCTGAAAATTCTCCTGCCAAACAAATAAGCAACTCCAATGACCACGTTAATGGCTCCTTTTCCAATCCTCTTGATATCGAAAAACTAGCTGAAAAATGTGATGTGCTAACGATTGAGATTGAGCATGTTGATGTTCCTACACTAAAGAATCTTCAAGTAAAACATCCCAAATTAAAAATTTACCCTTCTCCAGAAACAATCAGATTGATACAAGACAAATATATTCAAAAAGAGCATTTAATCAAAAATGGTATAGCAGTTACCCAAAGTGTTCCTGTGGAACAAGCCAGTGAGACGTCCCTATTGAATGTTGGAAGAGATTTGGGTTTTCCATTCGTCTTGAAGTCGAGGACTTTGGCATACGATGGAAGAGGTAACCGCTTTTGTGAAAAGAGAAGAGTCATTTGTCAGCATAGCTGTAATAATCAATCATGACGTAAGAAATGTATCATAATTAAAAGTTGTTAAAGATGTCAGTGTTATGTTGGTGTTACAAAATTCTCGGCTTCTCACTAATATTTAATATCTCTTAAATTTTATCTGTCTTTGATTCTTTTAAGAAAAGTTATGTATTATTCAAGAAAAAGTCAATTCCGCAATCCAAAAGAAGG"
correct_sequence2 <- "ATGGATTCTAGAACAGTTGGTATATTAGGAGGGGGACAATTGGGACGTATGATTGTTGAGTAAGCTAACAGGCTCAACATTAAGACGGTAATACTAGATGCTGAAAATTCTCC"

DNAString(correct_sequence2) == TimTimTrimTrim(test_fasta2,
                                               L_boundary=L_boundary,
                                               R_boundary=R_boundary)
[1] TRUE

Test Example 3

test_fasta3 <- "NCTATAAAGTTCAAGCTGTACTCGTTATGGAGCATTTCATGTATAAATTGGTGCGTAAAATCGTTGGATCTCTCTTCTAAGTACATCCTACTATAACAATCAAGAAAAACAAGAAAATCGGACAAAACAATCAAGTATGGATTCTAGAACAGTTGGTATATTAGGAGGGGGATAATTGTGACGTATGATTGTTGAGTAAGCAAACAGGCTCAACATTAAGACGGTAATACTAGATGCTGAAAATTCTCCTGCCAAACAAATAAGCAACTCCCATGACCACGTTAATGGCTCCTTTTCCAATCCTCTTGATATCGAAAAACTAACTGAAAAATGTGATGTGCTAACGATTGACATTGAGCATGTTGATGTTCCTACACTAAAGAATCTTCACGTAAAACATCCCAAATTAAAAATTTACCCTTCTCCCCAAACAATCACATTGATACAAGACAAATATATTCAAAAAGAGAATTTTTTCACAAATGGTATATCAGTTACCCCAAGTGTTCCTGTGGAACAAGCCAGTGAGACGTCCCTATTGAATGTTGTAAGAGATTTGGGTTTTCCCTTCTCCTTGAAGTCGAGGACTTTGTTATATTATGGAAAAGGTAACTTCGCTGTAAAGAATAAGGAAATGATTCCGGAGACTTTGTAAATACTGAAAGATCCTCCTTTTGTACGCCGAAAA"

correct_sequence3 <- "ATGGATTCTAGAACAGTTGGTATATTAGGAGGGGGATAATTGTGACGTATGATTGTTGAGTAAGCAAACAGGCTCAACATTAAGACGGTAATACTAGATGCTGAAAATTCTCC"

DNAString(correct_sequence3) == TimTimTrimTrim(test_fasta3,
                                               L_boundary=L_boundary,
                                               R_boundary=R_boundary)
[1] TRUE
LS0tCnRpdGxlOiAiVGltVGltVHJpbVRyaW0iCm91dHB1dDogaHRtbF9ub3RlYm9vawotLS0KCmBgYHtyfQpsaWJyYXJ5KEJpb2NNYW5hZ2VyKQpsaWJyYXJ5KEJpb3N0cmluZ3MpCmBgYAoKVGVzdCB2YWx1ZXMKYGBge3J9CnRlc3RfZmFzdGEgPC0gIk5OR0NBVEdDR0FUR0FDVENUVEdDVEdBQ0NUVFRUQVRUQUFHQUFDVEFBQVRHR0FDQUFUQVRUQVRHR0FHQ0FUVFRDQVRHVEFUQUFBVFRHR1RHQ0dUQUFBQVRDR1RUR0dBVENUQ1RDVFRDVEFBR1RBQ0FUQ0NUQUNUQVRBQUNBQVRDQUFHQUFBQUFDQUFHQUFBQVRDR0dBQ0FBQUFDQUFUQ0FBR1RBVEdHQVRUQ1RBR0FBQ0FHVFRHR1RBVEFUVEFHR0FHR0dHR0FDQUFUVEdHR0FDR1RBVEdBVFRHVFRHQUdUQUFHQ1RBQUNBR0dDVENBQUNBVFRBQUdBQ0dHVEFBVEFDVEFHQVRHQ1RHQUFBQVRUQ1RDQ1RHQ0NBQUFDQUFBVEFBR0NBQUNUQ0NBQVRHQUNDQUNHVFRBQVRHR0NUQ0NUVFRUQ0NBQVRDQ1RDVFRHQVRBVENHQUFBQUFDVEFHQ1RHQUFBQUFUR1RHQVRHVEdDVEFBQ0dBVFRHQUdBVFRHQUdDQVRHVFRHQVRHVFRDQ1RBQ0FDVEFBQUdBQVRDVFRDQUFHVEFBQUFDQVRDQ0NBQUFUVEFBQUFBVFRUQUNDQ1RUQ1RDQ0FHQUFBQ0FBVENBR0FUVEdBVEFDQUFHQUNBQUFUQVRBVFRDQUFBQUFHQUdDQVRUVEFBVENBQUFBQVRHR1RBVEFHQ0FHVFRBQ0NDQUFBR1RHVFRDQ1RHVEdHQUFDQUFHQ0NBR1RHQUdBQ0dUQ0NDVEFUVEdBQVRHVFRHR0FBR0FHQVRUVEdHR1RUVFRDQ0FUVENHVENUVEdBQUdUQ0dBR0dBQ1RUVEdHQ0FUQUNHQVRHR0FBR0FHR1RBQUNDR0NUVFRUR1RHQUFBQUdBR0FBR0FHVENBVFRUR1RDQUdDQVRBR0NUR1RBQVRBQVRDQUFUQ0FUR0FDR1RBQUdBQUFUR1RBVENBVEFBVFRBQUFBR1RUR1RUQUFBR0FUR1RDQUdUR1RUQVRHVFRHR1RHVFRBQ0FBQUFUVENUQ0dHQ1RUQ1RDQUNUQUFUQVRUVEFBVEFUQ1RDVFRBQUFUVFRUQVRDVEdUQ1RUVEdBVFRDVFRUVEFBR0FBQUFHVFRBVEdUQVRUQVRUQ0FBR0FBQUFBR1RDQUFUVENDR0NBQVRDQ0FBQUFHQUFHRyIKCmNvcnJlY3Rfc2VxdWVuY2UgPC0gIkFUR0dBVFRDVEFHQUFDQUdUVEdHVEFUQVRUQUdHQUdHR0dHQUNBQVRUR0dHQUNHVEFUR0FUVEdUVEdBR1RBQUdDVEFBQ0FHR0NUQ0FBQ0FUVEFBR0FDR0dUQUFUQUNUQUdBVEdDVEdBQUFBVFRDVENDIgoKTF9ib3VuZGFyeSA8LSAiQVRHR0FUVENUQUdBQUMiClJfYm91bmRhcnkgPC0gIkdDVEdBQUFBVFRDVENDIgpgYGAKClRlc3QgYmFzaWMgY29uY2VwdApgYGB7cn0KdGVzdF9kbmEgPC0gRE5BU3RyaW5nKHRlc3RfZmFzdGEpICMgY29udmVydCB0byBCaW9zdHJpbmdzOjpETkFTdHJpbmcgb2JqZWN0CkxfbWF0Y2ggPC0gbWF0Y2hQYXR0ZXJuKExfYm91bmRhcnksIHRlc3RfZG5hKSAjIHJhbmdlIG9mIGxlZnQgbWF0Y2gKUl9tYXRjaCA8LSBtYXRjaFBhdHRlcm4oUl9ib3VuZGFyeSwgdGVzdF9kbmEpICMgcmFuZ2Ugb2YgcmlnaHQgbWF0Y2gKCiMgZG9lcyBpdCB3b3JrCnRlc3RfZG5hW3N0YXJ0KExfbWF0Y2gpOiBlbmQoUl9tYXRjaCldID09IEROQVN0cmluZyhjb3JyZWN0X3NlcXVlbmNlKSAKCmBgYAoKRW5jYXBzdWxhdGUKYGBge3J9ClRpbVRpbVRyaW1UcmltIDwtIGZ1bmN0aW9uKFN1YmplY3QsIExfYm91bmRhcnk9IiIsIFJfYm91bmRhcnk9IiIpeyAjIGlucHV0IGFzIHN0cmluZ3MKICBkbmFfb2JqIDwtIEROQVN0cmluZyhTdWJqZWN0KQogIAogIExfbWF0Y2ggPC0gbWF0Y2hQYXR0ZXJuKExfYm91bmRhcnksIGRuYV9vYmopCiAgUl9tYXRjaCA8LSBtYXRjaFBhdHRlcm4oUl9ib3VuZGFyeSwgZG5hX29iaikKICAKICByZXR1cm4oZG5hX29ialtzdGFydChMX21hdGNoKTplbmQoUl9tYXRjaCldKSAjIHJldHVybiBCaW9zdHJpbmcgb2JqZWN0Cn0KCiMgRG9lcyBpdCBzdGlsbCB3b3JrCkROQVN0cmluZyhjb3JyZWN0X3NlcXVlbmNlKSA9PSBUaW1UaW1UcmltVHJpbSh0ZXN0X2Zhc3RhLCAKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIExfYm91bmRhcnk9IkFUR0dBVFRDVEFHQUFDIiwgCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBSX2JvdW5kYXJ5PSJHQ1RHQUFBQVRUQ1RDQyIpCmBgYAoKVGVzdCBFeGFtcGxlIDIKYGBge3J9CnRlc3RfZmFzdGEyIDwtICJOTkdDQVRHQ0dBVEdBQ1RDVFRHQ1RHQUNDVFRUVEFUVEFBR0FBQ1RBQUFUR0dBQ0FBVEFUVEFUR0dBR0NBVFRUQ0FUR1RBVEFBQVRUR0dUR0NHVEFBQUFUQ0dUVEdHQVRDVENUQ1RUQ1RBQUdUQUNBVENDVEFDVEFUQUFDQUFUQ0FBR0FBQUFBQ0FBR0FBQUFUQ0dHQUNBQUFBQ0FBVENBQUdUQVRHR0FUVENUQUdBQUNBR1RUR0dUQVRBVFRBR0dBR0dHR0dBQ0FBVFRHR0dBQ0dUQVRHQVRUR1RUR0FHVEFBR0NUQUFDQUdHQ1RDQUFDQVRUQUFHQUNHR1RBQVRBQ1RBR0FUR0NUR0FBQUFUVENUQ0NUR0NDQUFBQ0FBQVRBQUdDQUFDVENDQUFUR0FDQ0FDR1RUQUFUR0dDVENDVFRUVENDQUFUQ0NUQ1RUR0FUQVRDR0FBQUFBQ1RBR0NUR0FBQUFBVEdUR0FUR1RHQ1RBQUNHQVRUR0FHQVRUR0FHQ0FUR1RUR0FUR1RUQ0NUQUNBQ1RBQUFHQUFUQ1RUQ0FBR1RBQUFBQ0FUQ0NDQUFBVFRBQUFBQVRUVEFDQ0NUVENUQ0NBR0FBQUNBQVRDQUdBVFRHQVRBQ0FBR0FDQUFBVEFUQVRUQ0FBQUFBR0FHQ0FUVFRBQVRDQUFBQUFUR0dUQVRBR0NBR1RUQUNDQ0FBQUdUR1RUQ0NUR1RHR0FBQ0FBR0NDQUdUR0FHQUNHVENDQ1RBVFRHQUFUR1RUR0dBQUdBR0FUVFRHR0dUVFRUQ0NBVFRDR1RDVFRHQUFHVENHQUdHQUNUVFRHR0NBVEFDR0FUR0dBQUdBR0dUQUFDQ0dDVFRUVEdUR0FBQUFHQUdBQUdBR1RDQVRUVEdUQ0FHQ0FUQUdDVEdUQUFUQUFUQ0FBVENBVEdBQ0dUQUFHQUFBVEdUQVRDQVRBQVRUQUFBQUdUVEdUVEFBQUdBVEdUQ0FHVEdUVEFUR1RUR0dUR1RUQUNBQUFBVFRDVENHR0NUVENUQ0FDVEFBVEFUVFRBQVRBVENUQ1RUQUFBVFRUVEFUQ1RHVENUVFRHQVRUQ1RUVFRBQUdBQUFBR1RUQVRHVEFUVEFUVENBQUdBQUFBQUdUQ0FBVFRDQ0dDQUFUQ0NBQUFBR0FBR0ciCmNvcnJlY3Rfc2VxdWVuY2UyIDwtICJBVEdHQVRUQ1RBR0FBQ0FHVFRHR1RBVEFUVEFHR0FHR0dHR0FDQUFUVEdHR0FDR1RBVEdBVFRHVFRHQUdUQUFHQ1RBQUNBR0dDVENBQUNBVFRBQUdBQ0dHVEFBVEFDVEFHQVRHQ1RHQUFBQVRUQ1RDQyIKCkROQVN0cmluZyhjb3JyZWN0X3NlcXVlbmNlMikgPT0gVGltVGltVHJpbVRyaW0odGVzdF9mYXN0YTIsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgTF9ib3VuZGFyeT1MX2JvdW5kYXJ5LAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIFJfYm91bmRhcnk9Ul9ib3VuZGFyeSkKYGBgCgpUZXN0IEV4YW1wbGUgMwpgYGB7cn0KdGVzdF9mYXN0YTMgPC0gIk5DVEFUQUFBR1RUQ0FBR0NUR1RBQ1RDR1RUQVRHR0FHQ0FUVFRDQVRHVEFUQUFBVFRHR1RHQ0dUQUFBQVRDR1RUR0dBVENUQ1RDVFRDVEFBR1RBQ0FUQ0NUQUNUQVRBQUNBQVRDQUFHQUFBQUFDQUFHQUFBQVRDR0dBQ0FBQUFDQUFUQ0FBR1RBVEdHQVRUQ1RBR0FBQ0FHVFRHR1RBVEFUVEFHR0FHR0dHR0FUQUFUVEdUR0FDR1RBVEdBVFRHVFRHQUdUQUFHQ0FBQUNBR0dDVENBQUNBVFRBQUdBQ0dHVEFBVEFDVEFHQVRHQ1RHQUFBQVRUQ1RDQ1RHQ0NBQUFDQUFBVEFBR0NBQUNUQ0NDQVRHQUNDQUNHVFRBQVRHR0NUQ0NUVFRUQ0NBQVRDQ1RDVFRHQVRBVENHQUFBQUFDVEFBQ1RHQUFBQUFUR1RHQVRHVEdDVEFBQ0dBVFRHQUNBVFRHQUdDQVRHVFRHQVRHVFRDQ1RBQ0FDVEFBQUdBQVRDVFRDQUNHVEFBQUFDQVRDQ0NBQUFUVEFBQUFBVFRUQUNDQ1RUQ1RDQ0NDQUFBQ0FBVENBQ0FUVEdBVEFDQUFHQUNBQUFUQVRBVFRDQUFBQUFHQUdBQVRUVFRUVENBQ0FBQVRHR1RBVEFUQ0FHVFRBQ0NDQ0FBR1RHVFRDQ1RHVEdHQUFDQUFHQ0NBR1RHQUdBQ0dUQ0NDVEFUVEdBQVRHVFRHVEFBR0FHQVRUVEdHR1RUVFRDQ0NUVENUQ0NUVEdBQUdUQ0dBR0dBQ1RUVEdUVEFUQVRUQVRHR0FBQUFHR1RBQUNUVENHQ1RHVEFBQUdBQVRBQUdHQUFBVEdBVFRDQ0dHQUdBQ1RUVEdUQUFBVEFDVEdBQUFHQVRDQ1RDQ1RUVFRHVEFDR0NDR0FBQUEiCgpjb3JyZWN0X3NlcXVlbmNlMyA8LSAiQVRHR0FUVENUQUdBQUNBR1RUR0dUQVRBVFRBR0dBR0dHR0dBVEFBVFRHVEdBQ0dUQVRHQVRUR1RUR0FHVEFBR0NBQUFDQUdHQ1RDQUFDQVRUQUFHQUNHR1RBQVRBQ1RBR0FUR0NUR0FBQUFUVENUQ0MiCgpETkFTdHJpbmcoY29ycmVjdF9zZXF1ZW5jZTMpID09IFRpbVRpbVRyaW1UcmltKHRlc3RfZmFzdGEzLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIExfYm91bmRhcnk9TF9ib3VuZGFyeSwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBSX2JvdW5kYXJ5PVJfYm91bmRhcnkpCmBgYAoK