From 59e20749cd176ee35d460e3907d67ad1bbf704b9 Mon Sep 17 00:00:00 2001 From: leo Date: Mon, 14 Sep 2015 18:36:05 +0800 Subject: [PATCH] complete MP1 --- MP1.java | 38 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 36 insertions(+), 2 deletions(-) diff --git a/MP1.java b/MP1.java index 89dadee..a1d0e3a 100644 --- a/MP1.java +++ b/MP1.java @@ -1,5 +1,10 @@ +import java.io.BufferedReader; import java.io.File; -import java.lang.reflect.Array; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStreamReader; +import java.nio.charset.Charset; +import java.nio.file.Files; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; import java.util.*; @@ -51,8 +56,37 @@ public MP1(String userName, String inputFileName) { public String[] process() throws Exception { String[] ret = new String[20]; - //TODO + List lines = Files.readAllLines(new File(inputFileName).toPath(), Charset.forName("utf-8")); + Map wordDict = new LinkedHashMap(); + for (Integer index: Arrays.asList(getIndexes())) { + String line = lines.get(index); + StringTokenizer st = new StringTokenizer(line); + while (st.hasMoreTokens()) { + String token = st.nextToken(delimiters).toLowerCase().trim(); + if (!Arrays.asList(stopWordsArray).contains(token)) { + if ((wordDict.containsKey(token))){ + Integer val = wordDict.get(token); + wordDict.put(token, val+1); + } + else { + wordDict.put(token, 1); + } + } + } + } + List> wordList = new ArrayList>(wordDict.entrySet()); + Collections.sort(wordList, new Comparator>() { + public int compare(Map.Entry a, Map.Entry b) { + int i = a.getValue().compareTo(b.getValue()) * (-1); + if (i != 0) return i; + return a.getKey().compareTo(b.getKey()); + } + }); + + for (int i=0; i < 20; i++) { + ret[i] = wordList.get(i).getKey(); + } return ret; }