Diff of /scripts/template1.R [000000] .. [c09aa8]

Switch to unified view

a b/scripts/template1.R
1
require(data.table)
2
3
cbindPad <- function(...){
4
  args <- list(...)
5
  n <- sapply(args,nrow)
6
  mx <- max(n)
7
  pad <- function(x, mx){
8
    if (nrow(x) < mx){
9
      nms <- colnames(x)
10
      padTemp <- matrix(NA, mx - nrow(x), ncol(x))
11
      colnames(padTemp) <- nms
12
      if (ncol(x)==0) {
13
        return(padTemp)
14
      } else {
15
        return(rbind(x,padTemp))
16
      }
17
    }
18
    else{
19
      return(x)
20
    }
21
  }
22
  rs <- lapply(args,pad,mx)
23
  return(do.call(cbind,rs))
24
}
25
26
numbo <- 3
27
28
29
if (numbo <= 0) {
30
  str2 <- paste("clust_", numbo, ".txt", sep = "")
31
}
32
setwd("/media/hdd0/unraiddisk1/student/neilm/primestmp/clusters")
33
34
res <- try(read.table(str2),silent = TRUE)
35
if (!inherits(res, 'try-error')) {
36
  b <- read.table(str2)
37
  b <- b[,2, drop = F] 
38
  b[] <- lapply(b, as.character)
39
  b[] <- lapply(b, tolower)
40
  
41
  l <- as.data.frame(matrix(0, ncol = 1, nrow = (nrow(b))))
42
  
43
  
44
  for (i in 1:nrow(b)) {
45
    if (length( unlist(strsplit(unlist(strsplit (b[i,1], "[^[:alnum:]<>=./]")), "(?=[<>=./])", perl = TRUE))) > ncol(l)) {
46
      l <- as.data.frame(matrix(0, ncol = length( unlist(strsplit(unlist(strsplit (b[i,1], "[^[:alnum:]<>=./]")), "(?=[<>=./])", perl = TRUE))), nrow = (nrow(b))))
47
    }
48
    
49
  }
50
  n <- 1
51
  for (i in 1:nrow(b)) {
52
    
53
    n <- as.numeric(as.numeric(ncol(l)) - as.numeric(length( unlist(strsplit(unlist(strsplit (b[i,1], "[^[:alnum:]<>=./]")), "(?=[<>=./])", perl = TRUE)))))
54
    l[i,] <- c( unlist(strsplit(unlist(strsplit (b[i,1], "[^[:alnum:]<>=./]")), "(?=[<>=./])", perl = TRUE)), rep("IGNORE", times = as.numeric(n)))
55
  }
56
  
57
  l <- cbind(l, rep(0, times = nrow(l)))
58
  names(l)[ncol(l)] <- "wc"
59
  for (i in 1:nrow(l)) {
60
    l[i,ncol(l)] <- length(grep("IGNORE", l[i,]))
61
  }
62
  
63
  
64
  row1 <- unlist(strsplit(unlist(strsplit(b[which.max(l$wc),1], "[^[:alnum:]<>=./]")), "(?=[<>=./])", perl = TRUE))
65
  
66
  l <- l[1:(ncol(l)-1)]
67
  d <- as.data.frame(matrix(.Machine$double.xmax, ncol = ncol(l), nrow = nrow(l)), stringsAsFactors = F)
68
  listo <- rep(.Machine$double.xmax, times = nrow(l))
69
  
70
  results <- rep('IGNORE', times = ncol(l))
71
  n <- 1
72
  k <- 1
73
  g <- 1
74
  p <- 0
75
  s <- 1
76
  check <- 0
77
  ignorecheck <- 0
78
  switch01 <- 0
79
  while (n <= length(row1)) {
80
    while (k <= length(row1)) {
81
      p <- 0 
82
      if (n > 0) {
83
        if (k > 0) {
84
          for (i in 1:nrow(l)) {
85
            if (i > 0) {
86
              for (v in 1:ncol(l)) {
87
                for (w in 1:ncol(l)) {  
88
                  if (s == 1) {  
89
                    if ( ((paste(l[i, v:w], collapse = ' ')) == (paste(row1[n:k], collapse = ' '))) ){
90
                      if (w < listo[i]) {  
91
                        p <- p + 1
92
                        listo[i] <- w
93
                        if (n == 1 & v > 1) {
94
                          check <- check + 1
95
                        }
96
                      }
97
                    }
98
                  }else {
99
                    if ( ((paste(l[i, v:w], collapse = ' ')) == (paste(row1[n:k], collapse = ' '))) ){
100
                      if (v > d[i,s-1]) {
101
                        if (w < listo[i]) {
102
                          p <- p + 1
103
                          listo[i] <- w
104
                        }
105
                      }
106
                    }
107
                  } 
108
                } 
109
              }
110
            }    
111
          }
112
        }
113
      }
114
      if (p >= nrow(b)) {
115
        results[g] <- paste(row1[n:k], collapse = ' ')
116
        if (k == length(row1)) {
117
          n <- k
118
        }
119
        if (k < length(row1)) {
120
          k <- k + 1
121
        }
122
        d[,s] <- listo
123
        listo <- rep(.Machine$double.xmax, times = nrow(l))
124
        ignorecheck <- ignorecheck + 1
125
        switch01 <- 1
126
      }
127
      if ((p < nrow(b))) {
128
        if (n < length(row1)) {
129
          if (switch01 == 0) {
130
            n <- k + 1
131
            k <- n
132
          }
133
          if (switch01 == 1) {
134
            n <- k
135
          }
136
          g <- g + 2
137
          if (d[1,s] != .Machine$double.xmax) {
138
            s <- s + 1
139
          }
140
          listo <- rep(.Machine$double.xmax, times = nrow(l))
141
          switch01 <- 0 
142
        }
143
      }
144
      if (n == length(row1)) {
145
        n <- n + 1
146
        k <- k + 1
147
      }
148
    }
149
  }
150
  if (ignorecheck > 0) {
151
    results <- unlist(strsplit(results, " "))
152
    
153
    results <- as.data.frame((results))
154
    results <- cbind(results, rep("f", times = nrow(results)))
155
    colnames(results)[ncol(results)] <- "tf"
156
    results[] <- lapply(results, as.character)
157
    results <- results[complete.cases(results),]
158
    
159
    for (i in 2:nrow(results)) {
160
      if (results[i,1] == "IGNORE") {
161
        if (results[i,1] == results[(i-1),1]) {
162
          results[i,2] <- "t"
163
        }
164
      }
165
    }
166
    
167
    igcheck <- 0
168
    results <- subset(results, tf == "f")  
169
    results <- as.data.frame(results[,-(ncol(results))])
170
    colnames(results)[1] <- "Template"
171
    results <- as.character(results[,1])
172
    if (length(results) < ncol(l)) {
173
      for (i in (length(results)+1):(ncol(l))) {
174
        results[i] <- "IGNORE"
175
        igcheck <- 1
176
      }
177
    }
178
    
179
    if (check > 0) {
180
      results <- c("IGNORE", results)
181
    }
182
    
183
    seq <- as.data.frame(t(as.data.frame(results)))
184
    seq <- seq[,(1:(ncol(l)))]
185
    seq [] <- lapply(seq, as.character)
186
    l[] <- lapply(l, as.character)
187
    
188
    p <- list(as.character(print(seq[1,])))
189
    p <- as.data.frame(p)
190
    colnames(p)[1] <- "col1"
191
    p <- cbind(p, rep("f", times =nrow(p)))
192
    colnames(p)[2] <- "tf"
193
    p[,2] <- sapply(p[,2], as.character)
194
    for (i in 2:nrow(p)) {
195
      if ((trimws(p[i,1], which = "both")) == "IGNORE") {
196
        if (p[i,1] == p[(i-1),1])
197
          p[i,2] <- "t"
198
      }
199
    }
200
    
201
    
202
    p <- subset(p, tf == "f")
203
    p <- as.character(print(p[,1]))
204
    p <- paste(p,collapse=" ")
205
    w <- strsplit(p, "IGNORE")
206
    w <- as.data.frame(w[[1]])
207
    colnames(w)[1] <- "col1"
208
    w <- subset(w, col1 != " ")
209
    w <- subset(w, col1 != "")
210
    w <- as.character(w[,1])
211
    w <- gsub(" ","", w)
212
    
213
    
214
    non2 <- as.data.frame(matrix(0, ncol = ncol(l), nrow = nrow(l)), stringsAsFactors = F)
215
    d2 <- as.data.frame(matrix("f", ncol = length(w), nrow = nrow(l)), stringsAsFactors = F)
216
    
217
    non2[] <- lapply(non2, as.numeric)
218
    
219
    g <- 1 
220
    
221
    for (j in 1:nrow(l)) {
222
      if (j > 0) {
223
        for (i in 1:length(w)) {
224
          if (i > 0) {
225
            for (k in 1:ncol(l)) {
226
              for (m in 1:ncol(l)) {
227
                if ( (gsub(" ","",paste(l[j,k:m],collapse=" ")) == w[i]) & (d2[j,i] == "f") ){
228
                  d2[j,i] <- "t"
229
                  if (g == 1) {
230
                    non2[j, g] <- k
231
                    non2[j, g+1] <- m
232
                    g <- g + 2
233
                  }else if ((k > as.numeric(non2[j, (g-1)])) & (m > as.numeric(non2[j, (g-1)]))) {
234
                    non2[j, g] <- k
235
                    non2[j, g+1] <- m
236
                    g <- g + 2
237
                  }
238
                }
239
              }
240
            }
241
          }
242
        }
243
      }
244
      g <- 1
245
    }
246
    
247
    non3 <- as.data.frame(matrix("IGNORE", ncol = ncol(l), nrow = nrow(l)), stringsAsFactors = F)
248
    
249
    g <- rep(1, times = nrow(non2))
250
    f <- 1
251
    
252
    
253
    for (m in 1:nrow(non2)) {
254
      q <- as.character(print(non2[m,]))
255
      q <- paste(q,collapse=" ")
256
      q <- unlist(strsplit(q, " "))
257
      x <- 1
258
      while (x <= length(q)) {
259
        if (q[x] == "0") {
260
          q <- q[-x]
261
        }else {
262
          x <- x + 1
263
        }
264
      }
265
      q <- list(q)
266
      q <- as.data.frame(q[[1]])
267
      colnames(q)[1] <- "col1"
268
      q <- subset(q, col1 != "")
269
      q[,1] <- sapply(q[,1], as.character)
270
      q[,1] <- sapply(q[,1], as.numeric)
271
      i <- 1
272
      while (i <= nrow(q)) {
273
        if (i == 1) { 
274
          if (as.numeric(q[i,1]-1) >=1) {
275
            non3[m,g[m]] <- paste(as.character(l[m,1:(as.numeric(q[i,1]-1))]), collapse = " ")
276
            g[m] <- g[m] + 1
277
          }else {
278
            g[m] <- 2
279
          }
280
        }
281
        if ((i %% 2 == 0) & (i != nrow(q)) & ((as.numeric(q[i+1,1]-1)) >= (as.numeric(q[i,1]+1)))) {
282
          non3[m,g[m]] <- paste(as.character(l[m,(as.numeric(q[i,1]+1)):(as.numeric(q[i+1,1]-1))]), collapse = " ")
283
          g[m] <- g[m] + 1
284
        }
285
        if ((i %% 2 == 0) & (i != nrow(q)) & ((as.numeric(q[i+1,1]-1)) < (as.numeric(q[i,1]+1)))) {
286
          g[m] <- g[m] + 1
287
        }
288
        if ((i %% 2 == 0) & (i == nrow(q)) & (as.numeric(q[i,1]+1) <= as.numeric(ncol(l)))) {
289
          non3[m,g[m]] <- paste(as.character(l[m,(as.numeric(q[i,1]+1)):(as.numeric(ncol(l)))]), collapse = " ")
290
          g[m] <- g[m] + 1
291
        }
292
        if (i < nrow(q)) {
293
          i <- i + 1
294
        }else {
295
          break
296
        }
297
      }
298
    }
299
    
300
    
301
    for (i in 1:ncol(seq)) {
302
      if (seq[1,i] == "IGNORE") {
303
        seq[1,i] <- "BLANK" 
304
      }
305
    }
306
    
307
    non3 <- as.data.frame(t(non3))
308
    non3 <- cbind(non3, rep("f", times = nrow(non3)))
309
    colnames(non3)[ncol(non3)] <- "tf"
310
    non3[] <- lapply(non3, as.character)
311
    
312
    for (i in 1:nrow(non3)) {
313
      if (paste(as.character(print(non3[i,1:(ncol(non3)-1)])), collapse = " ") == paste(as.character(rep("IGNORE", times = ncol(non3)-1)), collapse = " ")) {
314
        non3[i,ncol(non3)] <- "t" 
315
      }
316
    }
317
    
318
    non3 <- subset(non3, tf == "f")  
319
    non3 <- non3[,-(ncol(non3))]
320
    non3 <- as.data.frame(t(non3))
321
    
322
    seq <- as.data.frame(t(seq))
323
    seq <- cbind(seq, rep("f", times = nrow(seq)))
324
    colnames(seq)[ncol(seq)] <- "tf"
325
    seq[] <- lapply(seq, as.character)
326
    
327
    for (i in 2:nrow(seq)) {
328
      if (seq[i,1] == "BLANK") {
329
        if (seq[i,1] == seq[(i-1),1]) {
330
          seq[i,2] <- "t"
331
        }
332
      }
333
    }
334
    
335
    if (seq[nrow(seq),1] == "BLANK") {
336
      if (igcheck == 1) {
337
        seq[nrow(seq),2] <- "t" 
338
      }
339
    }
340
    
341
    seq <- subset(seq, tf == "f")  
342
    seq <- as.data.frame(seq[,-(ncol(seq))])
343
    
344
    
345
    
346
    
347
    colnames(seq)[1] <- "Template"
348
    seq <- as.data.frame(t(seq))
349
    
350
    
351
    template <- seq
352
    
353
    
354
    
355
    fillins <- non3
356
    
357
    if (ncol(non3) > 0) {
358
      
359
      for (j in 1:ncol(fillins)) {
360
        fillinstest <- as.character(fillins[,j])
361
        for (i in 1:length(fillinstest)) {
362
          fillinstest[i] <- gsub("IGNORE", "", fillinstest[i])
363
        }
364
        fillins[,j] <- fillinstest
365
      }
366
      
367
      
368
      
369
      i <- 1
370
      while (i <=  ncol(fillins)) {
371
        fillins <- as.data.frame(cbind (fillins[,c(1:i)], rep(1, times = nrow(fillins)), fillins[,-c(1:i)]))
372
        i <- i+ 2
373
      }
374
      
375
      for (i in 1:ncol(fillins)) {
376
        if (i %% 2 == 0) {
377
          colnames(fillins)[i] <- "Frequency"
378
          fillins[,i] <- sapply(fillins[,i], as.character)
379
          fillins[,i] <- sapply(fillins[,i], as.numeric)
380
        }
381
      }
382
      for (i in 1:ncol(fillins)) {
383
        if (i %% 2 == 1) {
384
          colnames(fillins)[i] <- paste("Blank", (i+1)/2, collapse ="")
385
          fillins[,i] <- sapply(fillins[,i], as.character)
386
        }
387
      }
388
      for (j in 1:ncol(fillins)) {
389
        for (i in 1:nrow(fillins)) {
390
          if (j %% 2 == 1) {
391
            for (k in 1:i) {
392
              if (k < i) {
393
                if (trimws(fillins[i, j], which = "both") == trimws(fillins[k,j], which = "both")) {
394
                  fillins[k, j+1] <- fillins[k, j+1] + 1
395
                  fillins[i, j] <- paste("IGNORE", i, collapse ="")
396
                  fillins[i, j + 1] <- 0 
397
                }
398
              }
399
            }
400
          }
401
        }
402
      }
403
      
404
    }
405
    
406
  } else {
407
    template <- fillins <- as.data.frame(matrix("", ncol = 1, nrow = 1))
408
  }
409
  
410
  
411
  template <- cbindPad(template, b)
412
  fillins <- cbindPad(fillins, b)
413
  names(template)[ncol(template)] <- "Cluster Text"
414
  names(fillins)[ncol(fillins)] <- "Cluster Text"
415
  setwd("/media/hdd0/unraiddisk1/student/neilm/primestmp/clusters") 
416
  numbochar <- as.character(numbo)
417
  numbotempstr <- paste("template", numbochar, ".csv", sep = "")
418
  numbofillinsstr <- paste("fillins", numbochar, ".csv", sep = "")
419
  write.csv(template, file = numbotempstr, row.names = F)
420
  write.csv(fillins, file = numbofillinsstr, col.names = F)
421
  
422
}
423
numbo <- numbo + 1
424
425
426
427
428
429
while (numbo <= (750)) {
430
  
431
  if (numbo <= 750) {
432
    str2 <- paste("clust_", numbo, ".txt", sep = "")
433
  }
434
  setwd("/media/hdd0/unraiddisk1/student/neilm/primestmp/clusters")
435
  
436
  res <- try(read.table(str2),silent = TRUE)
437
  
438
  if (!inherits(res, 'try-error')) {
439
    b <- read.table(str2)
440
    b <- b[,2, drop = F] 
441
    b[] <- lapply(b, as.character)
442
    b[] <- lapply(b, tolower)
443
    
444
    l <- as.data.frame(matrix(0, ncol = 1, nrow = (nrow(b))))
445
    
446
    
447
    for (i in 1:nrow(b)) {
448
      if (length( unlist(strsplit(unlist(strsplit (b[i,1], "[^[:alnum:]<>=./]")), "(?=[<>=./])", perl = TRUE))) > ncol(l)) {
449
        l <- as.data.frame(matrix(0, ncol = length( unlist(strsplit(unlist(strsplit (b[i,1], "[^[:alnum:]<>=./]")), "(?=[<>=./])", perl = TRUE))), nrow = (nrow(b))))
450
      }
451
      
452
    }
453
    n <- 1
454
    for (i in 1:nrow(b)) {
455
      
456
      n <- as.numeric(as.numeric(ncol(l)) - as.numeric(length( unlist(strsplit(unlist(strsplit (b[i,1], "[^[:alnum:]<>=./]")), "(?=[<>=./])", perl = TRUE)))))
457
      l[i,] <- c( unlist(strsplit(unlist(strsplit (b[i,1], "[^[:alnum:]<>=./]")), "(?=[<>=./])", perl = TRUE)), rep("IGNORE", times = as.numeric(n)))
458
    }
459
    
460
    l <- cbind(l, rep(0, times = nrow(l)))
461
    names(l)[ncol(l)] <- "wc"
462
    for (i in 1:nrow(l)) {
463
      l[i,ncol(l)] <- length(grep("IGNORE", l[i,]))
464
    }
465
    
466
    
467
    row1 <- unlist(strsplit(unlist(strsplit(b[which.max(l$wc),1], "[^[:alnum:]<>=./]")), "(?=[<>=./])", perl = TRUE))
468
    
469
    l <- l[1:(ncol(l)-1)]
470
    d <- as.data.frame(matrix(.Machine$double.xmax, ncol = ncol(l), nrow = nrow(l)), stringsAsFactors = F)
471
    listo <- rep(.Machine$double.xmax, times = nrow(l))
472
    
473
    results <- rep('IGNORE', times = ncol(l))
474
    n <- 1
475
    k <- 1
476
    g <- 1
477
    p <- 0
478
    s <- 1
479
    check <- 0
480
    ignorecheck <- 0
481
    switch01 <- 0
482
    while (n <= length(row1)) {
483
      while (k <= length(row1)) {
484
        p <- 0 
485
        if (n > 0) {
486
          if (k > 0) {
487
            for (i in 1:nrow(l)) {
488
              if (i > 0) {
489
                for (v in 1:ncol(l)) {
490
                  for (w in 1:ncol(l)) {  
491
                    if (s == 1) {  
492
                      if ( ((paste(l[i, v:w], collapse = ' ')) == (paste(row1[n:k], collapse = ' '))) ){
493
                        if (w < listo[i]) {  
494
                          p <- p + 1
495
                          listo[i] <- w
496
                          if (n == 1 & v > 1) {
497
                            check <- check + 1
498
                          }
499
                        }
500
                      }
501
                    }else {
502
                      if ( ((paste(l[i, v:w], collapse = ' ')) == (paste(row1[n:k], collapse = ' '))) ){
503
                        if (v > d[i,s-1]) {
504
                          if (w < listo[i]) {
505
                            p <- p + 1
506
                            listo[i] <- w
507
                          }
508
                        }
509
                      }
510
                    } 
511
                  } 
512
                }
513
              }    
514
            }
515
          }
516
        }
517
        if (p >= nrow(b)) {
518
          results[g] <- paste(row1[n:k], collapse = ' ')
519
          if (k == length(row1)) {
520
            n <- k
521
          }
522
          if (k < length(row1)) {
523
            k <- k + 1
524
          }
525
          d[,s] <- listo
526
          listo <- rep(.Machine$double.xmax, times = nrow(l))
527
          ignorecheck <- ignorecheck + 1
528
          switch01 <- 1
529
        }
530
        if ((p < nrow(b))) {
531
          if (n < length(row1)) {
532
            if (switch01 == 0) {
533
              n <- k + 1
534
              k <- n
535
            }
536
            if (switch01 == 1) {
537
              n <- k
538
            }
539
            g <- g + 2
540
            if (d[1,s] != .Machine$double.xmax) {
541
              s <- s + 1
542
            }
543
            listo <- rep(.Machine$double.xmax, times = nrow(l))
544
            switch01 <- 0 
545
          }
546
        }
547
        if (n == length(row1)) {
548
          n <- n + 1
549
          k <- k + 1
550
        }
551
      }
552
    }
553
    if (ignorecheck > 0) {
554
      results <- unlist(strsplit(results, " "))
555
      
556
      results <- as.data.frame((results))
557
      results <- cbind(results, rep("f", times = nrow(results)))
558
      colnames(results)[ncol(results)] <- "tf"
559
      results[] <- lapply(results, as.character)
560
      results <- results[complete.cases(results),]
561
      
562
      for (i in 2:nrow(results)) {
563
        if (results[i,1] == "IGNORE") {
564
          if (results[i,1] == results[(i-1),1]) {
565
            results[i,2] <- "t"
566
          }
567
        }
568
      }
569
      
570
      igcheck <- 0
571
      results <- subset(results, tf == "f")  
572
      results <- as.data.frame(results[,-(ncol(results))])
573
      colnames(results)[1] <- "Template"
574
      results <- as.character(results[,1])
575
      if (length(results) < ncol(l)) {
576
        for (i in (length(results)+1):(ncol(l))) {
577
          results[i] <- "IGNORE"
578
          igcheck <- 1
579
        }
580
      }
581
      
582
      if (check > 0) {
583
        results <- c("IGNORE", results)
584
      }
585
      
586
      seq <- as.data.frame(t(as.data.frame(results)))
587
      seq <- seq[,(1:(ncol(l)))]
588
      seq [] <- lapply(seq, as.character)
589
      l[] <- lapply(l, as.character)
590
      
591
      p <- list(as.character(print(seq[1,])))
592
      p <- as.data.frame(p)
593
      colnames(p)[1] <- "col1"
594
      p <- cbind(p, rep("f", times =nrow(p)))
595
      colnames(p)[2] <- "tf"
596
      p[,2] <- sapply(p[,2], as.character)
597
      for (i in 2:nrow(p)) {
598
        if ((trimws(p[i,1], which = "both")) == "IGNORE") {
599
          if (p[i,1] == p[(i-1),1])
600
            p[i,2] <- "t"
601
        }
602
      }
603
      
604
      
605
      p <- subset(p, tf == "f")
606
      p <- as.character(print(p[,1]))
607
      p <- paste(p,collapse=" ")
608
      w <- strsplit(p, "IGNORE")
609
      w <- as.data.frame(w[[1]])
610
      colnames(w)[1] <- "col1"
611
      w <- subset(w, col1 != " ")
612
      w <- subset(w, col1 != "")
613
      w <- as.character(w[,1])
614
      w <- gsub(" ","", w)
615
      
616
      
617
      non2 <- as.data.frame(matrix(0, ncol = ncol(l), nrow = nrow(l)), stringsAsFactors = F)
618
      d2 <- as.data.frame(matrix("f", ncol = length(w), nrow = nrow(l)), stringsAsFactors = F)
619
      
620
      non2[] <- lapply(non2, as.numeric)
621
      
622
      g <- 1 
623
      
624
      for (j in 1:nrow(l)) {
625
        if (j > 0) {
626
          for (i in 1:length(w)) {
627
            if (i > 0) {
628
              for (k in 1:ncol(l)) {
629
                for (m in 1:ncol(l)) {
630
                  if ( (gsub(" ","",paste(l[j,k:m],collapse=" ")) == w[i]) & (d2[j,i] == "f") ){
631
                    d2[j,i] <- "t"
632
                    if (g == 1) {
633
                      non2[j, g] <- k
634
                      non2[j, g+1] <- m
635
                      g <- g + 2
636
                    }else if ((k > as.numeric(non2[j, (g-1)])) & (m > as.numeric(non2[j, (g-1)]))) {
637
                      non2[j, g] <- k
638
                      non2[j, g+1] <- m
639
                      g <- g + 2
640
                    }
641
                  }
642
                }
643
              }
644
            }
645
          }
646
        }
647
        g <- 1
648
      }
649
      
650
      non3 <- as.data.frame(matrix("IGNORE", ncol = ncol(l), nrow = nrow(l)), stringsAsFactors = F)
651
      
652
      g <- rep(1, times = nrow(non2))
653
      f <- 1
654
      
655
      
656
      for (m in 1:nrow(non2)) {
657
        q <- as.character(print(non2[m,]))
658
        q <- paste(q,collapse=" ")
659
        q <- unlist(strsplit(q, " "))
660
        x <- 1
661
        while (x <= length(q)) {
662
          if (q[x] == "0") {
663
            q <- q[-x]
664
          }else {
665
            x <- x + 1
666
          }
667
        }
668
        q <- list(q)
669
        q <- as.data.frame(q[[1]])
670
        colnames(q)[1] <- "col1"
671
        q <- subset(q, col1 != "")
672
        q[,1] <- sapply(q[,1], as.character)
673
        q[,1] <- sapply(q[,1], as.numeric)
674
        i <- 1
675
        while (i <= nrow(q)) {
676
          if (i == 1) { 
677
            if (as.numeric(q[i,1]-1) >=1) {
678
              non3[m,g[m]] <- paste(as.character(l[m,1:(as.numeric(q[i,1]-1))]), collapse = " ")
679
              g[m] <- g[m] + 1
680
            }else {
681
              g[m] <- 2
682
            }
683
          }
684
          if ((i %% 2 == 0) & (i != nrow(q)) & ((as.numeric(q[i+1,1]-1)) >= (as.numeric(q[i,1]+1)))) {
685
            non3[m,g[m]] <- paste(as.character(l[m,(as.numeric(q[i,1]+1)):(as.numeric(q[i+1,1]-1))]), collapse = " ")
686
            g[m] <- g[m] + 1
687
          }
688
          if ((i %% 2 == 0) & (i != nrow(q)) & ((as.numeric(q[i+1,1]-1)) < (as.numeric(q[i,1]+1)))) {
689
            g[m] <- g[m] + 1
690
          }
691
          if ((i %% 2 == 0) & (i == nrow(q)) & (as.numeric(q[i,1]+1) <= as.numeric(ncol(l)))) {
692
            non3[m,g[m]] <- paste(as.character(l[m,(as.numeric(q[i,1]+1)):(as.numeric(ncol(l)))]), collapse = " ")
693
            g[m] <- g[m] + 1
694
          }
695
          if (i < nrow(q)) {
696
            i <- i + 1
697
          }else {
698
            break
699
          }
700
        }
701
      }
702
      
703
      
704
      for (i in 1:ncol(seq)) {
705
        if (seq[1,i] == "IGNORE") {
706
          seq[1,i] <- "BLANK" 
707
        }
708
      }
709
      
710
      non3 <- as.data.frame(t(non3))
711
      non3 <- cbind(non3, rep("f", times = nrow(non3)))
712
      colnames(non3)[ncol(non3)] <- "tf"
713
      non3[] <- lapply(non3, as.character)
714
      
715
      for (i in 1:nrow(non3)) {
716
        if (paste(as.character(print(non3[i,1:(ncol(non3)-1)])), collapse = " ") == paste(as.character(rep("IGNORE", times = ncol(non3)-1)), collapse = " ")) {
717
          non3[i,ncol(non3)] <- "t" 
718
        }
719
      }
720
      
721
      non3 <- subset(non3, tf == "f")  
722
      non3 <- non3[,-(ncol(non3))]
723
      non3 <- as.data.frame(t(non3))
724
      
725
      seq <- as.data.frame(t(seq))
726
      seq <- cbind(seq, rep("f", times = nrow(seq)))
727
      colnames(seq)[ncol(seq)] <- "tf"
728
      seq[] <- lapply(seq, as.character)
729
      
730
      for (i in 2:nrow(seq)) {
731
        if (seq[i,1] == "BLANK") {
732
          if (seq[i,1] == seq[(i-1),1]) {
733
            seq[i,2] <- "t"
734
          }
735
        }
736
      }
737
      
738
      if (seq[nrow(seq),1] == "BLANK") {
739
        if (igcheck == 1) {
740
          seq[nrow(seq),2] <- "t" 
741
        }
742
      }
743
      
744
      seq <- subset(seq, tf == "f")  
745
      seq <- as.data.frame(seq[,-(ncol(seq))])
746
      
747
      
748
      
749
      
750
      colnames(seq)[1] <- "Template"
751
      seq <- as.data.frame(t(seq))
752
      
753
      
754
      template <- seq
755
      
756
      
757
      
758
      fillins <- non3
759
      
760
      if (ncol(non3) > 0) {
761
        
762
        for (j in 1:ncol(fillins)) {
763
          fillinstest <- as.character(fillins[,j])
764
          for (i in 1:length(fillinstest)) {
765
            fillinstest[i] <- gsub("IGNORE", "", fillinstest[i])
766
          }
767
          fillins[,j] <- fillinstest
768
        }
769
        
770
        
771
        
772
        i <- 1
773
        while (i <=  ncol(fillins)) {
774
          fillins <- as.data.frame(cbind (fillins[,c(1:i)], rep(1, times = nrow(fillins)), fillins[,-c(1:i)]))
775
          i <- i+ 2
776
        }
777
        
778
        for (i in 1:ncol(fillins)) {
779
          if (i %% 2 == 0) {
780
            colnames(fillins)[i] <- "Frequency"
781
            fillins[,i] <- sapply(fillins[,i], as.character)
782
            fillins[,i] <- sapply(fillins[,i], as.numeric)
783
          }
784
        }
785
        for (i in 1:ncol(fillins)) {
786
          if (i %% 2 == 1) {
787
            colnames(fillins)[i] <- paste("Blank", (i+1)/2, collapse ="")
788
            fillins[,i] <- sapply(fillins[,i], as.character)
789
          }
790
        }
791
        for (j in 1:ncol(fillins)) {
792
          for (i in 1:nrow(fillins)) {
793
            if (j %% 2 == 1) {
794
              for (k in 1:i) {
795
                if (k < i) {
796
                  if (trimws(fillins[i, j], which = "both") == trimws(fillins[k,j], which = "both")) {
797
                    fillins[k, j+1] <- fillins[k, j+1] + 1
798
                    fillins[i, j] <- paste("IGNORE", i, collapse ="")
799
                    fillins[i, j + 1] <- 0 
800
                  }
801
                }
802
              }
803
            }
804
          }
805
        }
806
        
807
      }
808
      
809
    } else {
810
      template <- fillins <- as.data.frame(matrix("", ncol = 1, nrow = 1))
811
    }
812
    
813
    
814
    template <- cbindPad(template, b)
815
    fillins <- cbindPad(fillins, b)
816
    names(template)[ncol(template)] <- "Cluster Text"
817
    names(fillins)[ncol(fillins)] <- "Cluster Text"
818
    setwd("C:/Users/AI/Downloads/PipelineTestRun4PLAY") 
819
    numbochar <- as.character(numbo)
820
    numbotempstr <- paste("template", numbochar, ".csv", sep = "")
821
    numbofillinsstr <- paste("fillins", numbochar, ".csv", sep = "")
822
    write.csv(template, file = numbotempstr, row.names = F)
823
    write.csv(fillins, file = numbofillinsstr, col.names = F)
824
    
825
  }
826
  numbo <- numbo + 1
827
  
828
}