-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathWordCounter.java
More file actions
119 lines (106 loc) · 4.16 KB
/
WordCounter.java
File metadata and controls
119 lines (106 loc) · 4.16 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
package module7.parallelism.wordcounter;
import java.io.*;
import java.util.Arrays;
import java.util.regex.*;
/**
* Counts the number of times a word occurs in a body of text.
*
* @implNote Determines the {@link Runtime}'s number of available processors,
* then divides the text into that many equal chunks, which are split
* across multiple threads.
*/
public class WordCounter {
/**
* Using a regular expression, count every occurence of a word in a body of text
*
* @param text The body of text to search through
* @param word The word or phrase to search for
* @return
*/
public static int wordCount(String text, String word) {
Pattern pattern = Pattern.compile(word, Pattern.CASE_INSENSITIVE);
Matcher matcher = pattern.matcher(text);
int count = 0;
int i = 0;
while (matcher.find(i)) {
i = matcher.end();
count++;
}
return count;
}
/**
* @return The full text of the file located at {@code path}, or {@code null} if
* no such file exists.
*/
private static String readStringFromFile(String path) {
try (BufferedReader reader = new BufferedReader(new FileReader(path))) {
String text = "";
while (reader.ready()) {
text += reader.readLine() + "\n";
}
return text;
} catch (IOException e) {
e.printStackTrace();
return null;
}
}
/**
* Divides a long string into {@code count} segments, each with an equal number
* of lines.
*/
private static String[] splitStringIntoChunks(String text, int count) {
String[] lines = text.split("\n"); // Split into lines to avoid breaking around a word.
int sliceLength = (int) Math.ceil(lines.length / count);
String[] chunks = new String[count];
for (int i = 0; i < count; i++) {
int from = i * sliceLength;
int to = Math.min((i + 1) * sliceLength, lines.length);
String[] sub = Arrays.copyOfRange(lines, from, to);
chunks[i] = String.join("\n", sub);
}
return chunks;
}
public static int wordCountParallel(String str, String word) {
final int THREAD_COUNT = Runtime.getRuntime().availableProcessors();
Thread[] threads = new Thread[THREAD_COUNT];
int[] results = new int[THREAD_COUNT];
String[] chunks = splitStringIntoChunks(str, THREAD_COUNT);
for (int i = 0; i < THREAD_COUNT; i++) {
final int index = i; // Lambda expression requires final variables
Thread t = new Thread(() -> results[index] = wordCount(chunks[index], word));
threads[i] = t;
t.start();
}
try {
for (int i = 0; i < threads.length; i++) {
threads[i].join();
}
} catch (InterruptedException e) {
e.printStackTrace();
}
int totalCount = 0;
for (int partialCount : results) {
totalCount += partialCount;
}
return totalCount;
}
/**
* Demonstrate this class's functionality by reading from the fulltext of Alice
* in Wonderland
*/
public static void main(String[] args) {
String file = "alice.txt";
String fullText = readStringFromFile(file);
System.out.println(fullText);
long parallelDuration = System.currentTimeMillis();
int parallelCount = wordCount(fullText, "the");
parallelDuration = System.currentTimeMillis() - parallelDuration;
System.out.printf("=== PARALLEL COUNT (%d THREADS) ===\n", Runtime.getRuntime().availableProcessors());
System.out.printf("%s has %d occurences of \"the\". (took %,d ms)\n", file, parallelCount, parallelDuration);
long serialDuration = System.currentTimeMillis();
int serialCount = wordCount(fullText, "the");
serialDuration = System.currentTimeMillis() - serialDuration;
System.out.println("=== SERIAL COUNT ===");
System.out.printf("%s has %d occurences of \"the\". (took %,d ms)\n", file, serialCount, serialDuration);
}
}