Switch to unified view

a b/scripts/ctsamplescrape.java
1
import java.util.*;
2
import java.io.*;
3
import java.net.*;
4
public class ctsamplescrape {
5
  public static void main(String[] args) throws Exception {
6
    Scanner in = new Scanner(new File("example_ct.csv"));
7
    PrintWriter out = new PrintWriter(new File("ctsampleinds.csv"));
8
    String useless = in.nextLine();
9
    int count = 0;
10
    while(in.hasNext()) {
11
      System.out.println(count);
12
      count++;
13
      String[] terms = in.nextLine().split(",");
14
      String source = getURLSource(terms[2]);
15
      int index = source.indexOf("display_order");
16
      if (index > 0) {
17
        ArrayList<Integer> indices = new ArrayList<Integer>();
18
        ArrayList<Integer> startinds = new ArrayList<Integer>();
19
        ArrayList<Integer> endinds = new ArrayList<Integer>();
20
        while (index > 0) {
21
          indices.add(index);
22
          startinds.add(source.indexOf("description", index));
23
          endinds.add(source.indexOf("}", index));
24
          index = source.indexOf("display_order", index+1);
25
        }
26
        ArrayList<String> strings = new ArrayList<String>();
27
        for (int i = 0; i < startinds.size(); i++) strings.add(source.substring(startinds.get(i) + 14, endinds.get(i)-1));
28
        out.print(terms[0] + "," + terms[1] + ",\"[");
29
        for (int i = 0; i < strings.size(); i++) {
30
          out.print("\'" + strings.get(i) + "\'");
31
          if (i < strings.size() - 1) out.print(",");
32
        }
33
        out.println();
34
      }
35
    }
36
    in.close();
37
    out.close();
38
  }
39
  public static String getURLSource(String url) throws IOException
40
    {
41
        URL urlObject = new URL(url);
42
        URLConnection urlConnection = urlObject.openConnection();
43
        urlConnection.setRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.95 Safari/537.11");
44
        try {
45
        return toString(urlConnection.getInputStream());}catch(Exception e) {return "";}
46
    }
47
  private static String toString(InputStream inputStream) throws IOException
48
    {
49
        try (BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(inputStream, "UTF-8")))
50
        {
51
            String inputLine;
52
            StringBuilder stringBuilder = new StringBuilder();
53
            while ((inputLine = bufferedReader.readLine()) != null)
54
            {
55
                stringBuilder.append(inputLine);
56
            }
57
58
            return stringBuilder.toString();
59
        }
60
    }
61
}