Diff of /scripts/ctrpscrape.java [000000] .. [c09aa8]

Switch to unified view

a b/scripts/ctrpscrape.java
1
import java.util.*;
2
import java.io.*;
3
import java.net.*;
4
public class ctrpscrape {
5
  public static void main(String[] args) throws Exception {
6
    Scanner in = new Scanner(new File("fullctrpids.csv"));
7
    PrintWriter out = new PrintWriter(new File("ctrpinds.csv"));
8
    String useless = in.nextLine();
9
    int count = 0;
10
    while(in.hasNext()) {
11
      System.out.println(count);
12
      count++;
13
      String id = in.nextLine();
14
      String source = getURLSource("https://clinicaltrialsapi.cancer.gov/v1/clinical-trials?nct_id=" + id);
15
      int index = source.indexOf("display_order");
16
      if (index > 0) {
17
        ArrayList<Integer> indices = new ArrayList<Integer>();
18
        ArrayList<Integer> startinds = new ArrayList<Integer>();
19
        ArrayList<Integer> endinds = new ArrayList<Integer>();
20
        ArrayList<Integer> indinds = new ArrayList<Integer>();
21
        while (index > 0) {
22
          indices.add(index);
23
          startinds.add(source.indexOf("description", index));
24
          endinds.add(source.indexOf("}", index));
25
          indinds.add(source.indexOf("inclusion_indicator", index));
26
          index = source.indexOf("display_order", index+1);
27
        }
28
        ArrayList<String> strings = new ArrayList<String>();
29
        ArrayList<Boolean> ind = new ArrayList<Boolean>();
30
        for (int i = 0; i < startinds.size(); i++) strings.add(source.substring(startinds.get(i) + 14, endinds.get(i)-1).replaceAll("\"", "").replaceAll("\r\n", "").replaceAll("\n\r", ""));
31
        for (int i = 0; i < indinds.size(); i++) {
32
          if (source.charAt(indinds.get(i) + 21) == 't') ind.add(true);
33
          else ind.add(false);
34
        }
35
        out.print(id + ",");
36
        for (int i = 0; i < strings.size(); i++) {
37
          out.print("\"" + strings.get(i) + "\"," + ind.get(i));
38
          if (i < strings.size() - 1) out.print(",");
39
        }
40
        out.println();
41
      }
42
    }
43
    in.close();
44
    out.close();
45
  }
46
  public static String getURLSource(String url) throws IOException
47
    {
48
        URL urlObject = new URL(url);
49
        URLConnection urlConnection = urlObject.openConnection();
50
        urlConnection.setRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.95 Safari/537.11");
51
        try {
52
        return toString(urlConnection.getInputStream());}catch(Exception e) {return "";}
53
    }
54
  private static String toString(InputStream inputStream) throws IOException
55
    {
56
        try (BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(inputStream, "UTF-8")))
57
        {
58
            String inputLine;
59
            StringBuilder stringBuilder = new StringBuilder();
60
            while ((inputLine = bufferedReader.readLine()) != null)
61
            {
62
                stringBuilder.append(inputLine);
63
            }
64
65
            return stringBuilder.toString();
66
        }
67
    }
68
}