RuneHive-Game
Loading...
Searching...
No Matches
WikiTableParser.java
Go to the documentation of this file.
1package com.runehive.util.tools.wiki.parser;
2
3import com.google.common.util.concurrent.ListeningExecutorService;
4import com.google.common.util.concurrent.MoreExecutors;
5import com.google.common.util.concurrent.ThreadFactoryBuilder;
6import com.google.gson.Gson;
7import com.google.gson.GsonBuilder;
8import com.google.gson.JsonElement;
9import org.apache.logging.log4j.LogManager;
10import org.jsoup.Jsoup;
11import org.jsoup.nodes.Document;
12
13import java.io.IOException;
14import java.nio.file.Files;
15import java.nio.file.Path;
16import java.nio.file.Paths;
17import java.util.LinkedList;
18import java.util.concurrent.ExecutorService;
19import java.util.concurrent.Executors;
20import java.util.concurrent.TimeUnit;
21
22/**
23 * Parses a wiki page.
24 *
25 * @author Michael | Chex
26 */
27public abstract class WikiTableParser {
28
29 /** The path to the wiki dump json file location. */
30 private static final Path WRITE_PATH = Paths.get("./data/wiki");
31
32 /** The path to the wiki page address. */
33 protected static final String WIKI_LINK = "http://oldschoolrunescape.wikia.com/wiki/";
34
35 private static final org.apache.logging.log4j.Logger logger = LogManager.getLogger(WikiTableParser.class);
36
37 /** The gson object. */
38 protected static final Gson GSON = new GsonBuilder().setPrettyPrinting().disableHtmlEscaping().create();
39
40 /** A thread pool that will run tasks. */
41 private final ListeningExecutorService executorService;
42
43 /** The {@link WikiTable} list to parse. */
44 protected final LinkedList<WikiTable> tables;
45
46 /** Constructs a new {@code WikiTableParser} object. */
47 public WikiTableParser(LinkedList<WikiTable> tables) {
48 this.tables = tables;
49 ExecutorService delegateService = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors(),
50 new ThreadFactoryBuilder().setNameFormat("WikiTableParserThread").build());
51 executorService = MoreExecutors.listeningDecorator(delegateService);
52 }
53
54 /** Begins parsing wiki tables for all the names provided. */
55 public void begin() throws InterruptedException {
56 int size = tables.size();
57 logger.info("Parsing " + size + " names from " + WIKI_LINK);
58
59 tables.forEach(table -> execute(new ParserTask(table)));
60
61 executorService.shutdown();
62 executorService.awaitTermination(Long.MAX_VALUE, TimeUnit.DAYS);
63
64 finish();
65 }
66
67 protected void execute(Runnable runnable) {
68 executorService.execute(runnable);
69 }
70
71 protected abstract void finish();
72
73 /**
74 * Writes the parsed wiki dump to a json file in {@code ./data/wiki} named
75 * as the {@code name} parameter.
76 *
77 * @param name the name of the json file to dump to
78 */
79 protected static void writeToJson(String name, JsonElement array) {
80 try {
81 String json = GSON.toJson(array);
82
83 if (!Files.exists(WRITE_PATH)) {
84 Files.createDirectory(WRITE_PATH);
85 }
86
87 Path path = WRITE_PATH.resolve(name += ".json");
88 Files.write(path, json.getBytes());
89
90 logger.info("Successfully wrote " + path);
91 } catch (IOException e) {
92 logger.warn("Could not save '" + name + "' to '" + WRITE_PATH + "'", e);
93 }
94 }
95
96 /**
97 * A task that parses a web page doccument using JSoup.
98 *
99 * @author Michael | Chex
100 */
101 private final class ParserTask implements Runnable {
102
103 /** The data to parse. */
104 private final WikiTable table;
105
106 /**
107 * Constructs a new {@link ParserTask} object for a {@link WikiTable}
108 * object.
109 *
110 * @param table the table to parse
111 */
113 this.table = table;
114 }
115
116 @Override
117 public void run() {
118 try {
119 String link = table.getLink();
120 Document document = Jsoup.connect(link).get();
121 table.parseDocument(document);
122 } catch (Exception e) {
123 System.out.println("Could not parse table from wiki for '" + table.getLink() + "'");
124// logger.error("Could not parse table from wiki for '" + table.getLink() + "'", e);
125 }
126 }
127 }
128
129}
Represents a single path in the path finding system.
Definition Path.java:13
A task that parses a web page doccument using JSoup.
ParserTask(WikiTable table)
Constructs a new ParserTask object for a WikiTable object.
static void writeToJson(String name, JsonElement array)
Writes the parsed wiki dump to a json file in ./data/wiki named as the name parameter.
static final String WIKI_LINK
The path to the wiki page address.
void begin()
Begins parsing wiki tables for all the names provided.
WikiTableParser(LinkedList< WikiTable > tables)
Constructs a new WikiTableParser object.
static final Path WRITE_PATH
The path to the wiki dump json file location.
static final org.apache.logging.log4j.Logger logger
final LinkedList< WikiTable > tables
The WikiTable list to parse.
final ListeningExecutorService executorService
A thread pool that will run tasks.