|
a |
|
b/server.R |
|
|
1 |
# server.R |
|
|
2 |
library(shiny) |
|
|
3 |
|
|
|
4 |
Trim <- function( x ) { |
|
|
5 |
# http://stackoverflow.com/questions/2261079/how-to-trim-leading-and-trailing-whitespace-in-r |
|
|
6 |
gsub("(^[[:space:]]+|[[:space:]]+$)", "", x) |
|
|
7 |
} |
|
|
8 |
http://127.0.0.1:37673/rstudio/clear.cache.gif |
|
|
9 |
# load ngram data set |
|
|
10 |
all_ngrams <- read.table('c:/cordova/pubmed_cardiology_ngrams.csv', sep=",", header=TRUE) |
|
|
11 |
all_ngrams <- as.character(all_ngrams$x) |
|
|
12 |
|
|
|
13 |
# Define server logic required to summarize and view the selected dataset |
|
|
14 |
shinyServer(function(input, output) { |
|
|
15 |
|
|
|
16 |
# Return the requested dataset |
|
|
17 |
datasetInput <- reactive({ |
|
|
18 |
find_next_word(tolower(input$current_sentence)) |
|
|
19 |
}) |
|
|
20 |
|
|
|
21 |
# You can access the value of the widget with input$text, e.g. |
|
|
22 |
output$value <- renderPrint({ paste(tolower(input$text), find_next_word(tolower(input$text))) }) |
|
|
23 |
|
|
|
24 |
find_next_word <- function(current_sentence) { |
|
|
25 |
if (nchar(Trim(current_sentence)) == 0) |
|
|
26 |
return ('') |
|
|
27 |
|
|
|
28 |
# find the best next word |
|
|
29 |
# trailing space at end to avoid picking last word |
|
|
30 |
matches <- c() |
|
|
31 |
current_sentence <- paste0(Trim(current_sentence)," ") |
|
|
32 |
for (sentence in all_ngrams) { |
|
|
33 |
# find exact match with double backslash and escape |
|
|
34 |
if (grepl(paste0('\\<',current_sentence), sentence)) { |
|
|
35 |
matches <- c(matches, sentence) |
|
|
36 |
} |
|
|
37 |
} |
|
|
38 |
|
|
|
39 |
# didn't find a match so return nothing |
|
|
40 |
if (is.null(matches)) |
|
|
41 |
return ('') |
|
|
42 |
|
|
|
43 |
# find highest probability word |
|
|
44 |
precision_match <- c() |
|
|
45 |
for (a_match in matches) { |
|
|
46 |
# how many spaces in from of search word |
|
|
47 |
precision_match <- c(precision_match,nchar(strsplit(x = a_match, split = word)[[1]][[1]])) |
|
|
48 |
} |
|
|
49 |
|
|
|
50 |
# use highest number and a random of highest for multiples |
|
|
51 |
best_matched_sentence <- sample(matches[precision_match == max(precision_match)],size = 1) |
|
|
52 |
# split the best matching sentence by the search word |
|
|
53 |
best_match <- strsplit(x = best_matched_sentence, split = current_sentence)[[1]] |
|
|
54 |
# split second part by spaces and pick first word |
|
|
55 |
best_match <- strsplit(x = best_match[[2]], split = " ")[[1]] |
|
|
56 |
# return first word |
|
|
57 |
return (best_match[[1]]) |
|
|
58 |
} |
|
|
59 |
}) |