@@ -214,6 +227,19 @@
} else {
$('.div-streak-alg-controls').hide();
}
+
+/* if (alg === 'cosinesimilarity') {
+ $('.div-cosine-similarity').show();
+ } else {
+ $('.div-cosine-similarity').hide();
+ } */
+
+ if (alg === 'reviewalgo') {
+ $('.div-review-algo').show();
+ } else {
+ $('.div-review-algo').hide();
+ }
+
}
$('#algorithm').change(set_options_for_algorithm);
diff --git a/src/edu/tsinghua/dbgroup/EditDistanceClusterer.java b/src/edu/tsinghua/dbgroup/EditDistanceClusterer.java
index be03623..0985c80 100644
--- a/src/edu/tsinghua/dbgroup/EditDistanceClusterer.java
+++ b/src/edu/tsinghua/dbgroup/EditDistanceClusterer.java
@@ -10,6 +10,7 @@
import java.io.Serializable;
import java.util.Comparator;
import edu.tsinghua.dbgroup.*;
+
public class EditDistanceClusterer {
private final EditDistanceJoiner mJoiner;
static class SizeComparator implements Comparator> {
diff --git a/src/edu/tsinghua/dbgroup/EditDistanceJoiner.java b/src/edu/tsinghua/dbgroup/EditDistanceJoiner.java
index f69a6f7..5a7d016 100644
--- a/src/edu/tsinghua/dbgroup/EditDistanceJoiner.java
+++ b/src/edu/tsinghua/dbgroup/EditDistanceJoiner.java
@@ -4,6 +4,7 @@
import java.util.concurrent.*;
import edu.tsinghua.dbgroup.*;
+
class EditDistanceJoiner {
private List mStrings;
private final TreeMap>>> mGlobalIndex;
diff --git a/src/in/edu/ashoka/surf/CompatibleNameAlgorithm.java b/src/in/edu/ashoka/surf/CompatibleNameAlgorithm.java
index b7f8a2a..393b1ad 100755
--- a/src/in/edu/ashoka/surf/CompatibleNameAlgorithm.java
+++ b/src/in/edu/ashoka/surf/CompatibleNameAlgorithm.java
@@ -2,6 +2,7 @@
import com.google.common.collect.*;
import in.edu.ashoka.surf.util.Pair;
+import in.edu.ashoka.surf.util.Timers;
import in.edu.ashoka.surf.util.UnionFindSet;
import in.edu.ashoka.surf.util.Util;
import org.json.JSONArray;
@@ -342,9 +343,15 @@ public List> run() throws FileNotFoundException {
List filteredRows = filter.isEmpty() ? (List) new ArrayList<>(dataset.getRows()) : dataset.getRows().stream().filter(filter::passes).collect(toList());
+ Timers.CompatibleNameTimer.reset();
+ Timers.CompatibleNameTimer.start();
// now translate the row#s back to the actual rows
classes = new ArrayList<>();
runRecursive (classes, filteredRows, minTokenOverlap, substringAllowed, initialMapping);
+
+ Timers.CompatibleNameTimer.stop();
+
+ Timers.log.info ("Time for Compatible Name computation: " + Timers.CompatibleNameTimer.toString());
return classes;
}
diff --git a/src/in/edu/ashoka/surf/Config.java b/src/in/edu/ashoka/surf/Config.java
index b2f085c..7fd6639 100755
--- a/src/in/edu/ashoka/surf/Config.java
+++ b/src/in/edu/ashoka/surf/Config.java
@@ -28,6 +28,7 @@ public class Config {
public static String MERGE_FIELD = "Name";
public static final int groupsPerPage = 100;
public static final int DEFAULT_EDIT_DISTANCE = 2;
+// public static final int DEFAULT_COSINE_ACCURACY = 90;
public static final int DEFAULT_MIN_TOKEN_OVERLAP = 2;
public static final int DEFAULT_IGNORE_TOKEN_FREQUENCY = 200;
public static final int DEFAULT_MIN_SPLITWEIGHT = 10; // a token in a field will be split only if it's constituent parts have appeared independently > 10 times. (However, there is an additional factor of 2x needed if the fields are only of length 3)
diff --git a/src/in/edu/ashoka/surf/CosineFunc.java b/src/in/edu/ashoka/surf/CosineFunc.java
new file mode 100644
index 0000000..0008f6f
--- /dev/null
+++ b/src/in/edu/ashoka/surf/CosineFunc.java
@@ -0,0 +1,265 @@
+package in.edu.ashoka.surf;
+
+import static java.util.stream.Collectors.toList;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.LinkedHashSet;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.commons.lang3.text.WordUtils;
+
+import in.edu.ashoka.surf.Dataset;
+import in.edu.ashoka.surf.Row;
+
+class Node {
+ String name1;
+ String name2;
+ double cosinesimilarity;
+ int index;
+
+ public Node(String name1, String name2, double cosinesimilarity, int index) {
+ this.name1 = name1;
+ this.name2 = name2;
+ this.cosinesimilarity = cosinesimilarity;
+ this.index = index;
+ }
+
+ public String toString() {
+ return index + " " + name1 + " " + name2 + " " + cosinesimilarity;
+ }
+}
+
+class obj {
+
+ HashMap hash;
+ Set char_set;
+ double length;
+ String word;
+
+ public HashMap getHash() {
+ return hash;
+ }
+
+ public void setHash(HashMap hash) {
+ this.hash = hash;
+ }
+
+ public Set getChar_set() {
+ return char_set;
+ }
+
+ public void setChar_set(Set char_set) {
+ this.char_set = char_set;
+ }
+
+ public double getLength() {
+ return length;
+ }
+
+ public void setLength(int length) {
+ this.length = length;
+ }
+
+ public String getWord() {
+ return word;
+ }
+
+ public void setWord(String word) {
+ this.word = word;
+ }
+
+ public obj(HashMap hash, Set char_set, double length, String word) {
+ this.hash = hash;
+ this.char_set = char_set;
+ this.length = length;
+ this.word = word;
+ }
+
+}
+
+public class CosineFunc {
+
+ public static HashMap Count(String inputString) {
+ HashMap charCountMap = new HashMap();
+
+ char[] strArray = inputString.toCharArray();
+
+ for (char c : strArray) {
+ if (charCountMap.containsKey(c)) {
+
+ charCountMap.put(c, charCountMap.get(c) + 1);
+ } else {
+
+ charCountMap.put(c, 1);
+ }
+ }
+
+ return charCountMap;
+
+ }
+
+ public static obj word2vec(String word) {
+ HashMap count_characters = Count(word);
+// System.out.println(count_characters);
+ Set set_characters = count_characters.keySet();
+// System.out.println(set_characters);
+
+ double length = 0;
+ int key = 0;
+
+ for (Integer in : count_characters.values()) {
+ key += (in * in);
+ }
+ length = Math.sqrt(key);
+// System.out.println(length);
+
+ return new obj(count_characters, set_characters, length, word);
+
+ }
+
+ public static double cosine_similarity(obj vector1, obj vector2) {
+ Set common_characters = new HashSet(vector1.getChar_set()); // use the copy constructor
+ common_characters.retainAll(vector2.getChar_set());
+// System.out.println("Intersection = " + common_characters);
+
+ int product_summation = 0;
+ for (Character ch : common_characters) {
+ product_summation += vector1.getHash().get(ch) * vector2.getHash().get(ch);
+ }
+// System.out.println("product_summation = " + product_summation);
+
+ double length = vector1.length * vector2.length;
+// System.out.println("length = " + length);
+
+ if (length == 0) {
+ return 0;
+ } else {
+ return product_summation / length;
+ }
+
+ }
+
+ public List> assign_similarity(Collection filteredRows, String fieldName,double val) {
+
+// HashMap> map = new HashMap<>();
+ ArrayList names = new ArrayList<>();
+ List> resultx = new ArrayList>();
+ filteredRows.forEach(r -> names.add(r.get(fieldName)));
+// filteredRows.forEach(r -> map.put(r.get(fieldName), new ArrayList<>()));
+// ArrayList similar = new ArrayList<>();
+ boolean visited[] = new boolean[names.size()];
+
+// System.out.println("Map = " + map);
+
+// for (int i = 0; i < names.size(); i++) {
+// System.out.println(i + " " + names.get(i));
+// }
+ ArrayList aa = new ArrayList<>();
+ for (int i = 0; i < names.size(); i++) {
+ aa.add(word2vec(names.get(i)));
+ }
+
+ for (int i = 0; i < names.size(); i++) {
+ String one = names.get(i);
+ int task = 0;
+ Set curr = null;
+ if (visited[i] == false) {
+ task = 1;
+ curr = new LinkedHashSet();
+ visited[i] = true;
+ curr.add(one);
+ }
+// obj v1 = word2vec(one);
+ for (int j = i + 1; j < names.size(); j++) {
+ String two = names.get(j);
+// obj v2 = word2vec(two);
+ double cosine_val = cosine_similarity(aa.get(i),aa.get(j));
+// Node nn = new Node(one, two, cosine_similarity(word2vec(one), word2vec(two)), i);
+// similar.add(nn);
+
+ if (task == 1) {
+// System.out.println("hello");
+ if (cosine_val >= val && visited[j] == false) {
+// System.out.println("adi");
+ curr.add(two);
+ visited[j] = true;
+ }
+ }
+ }
+ if (task == 1) {
+ resultx.add(curr);
+ }
+ }
+// int l = 0;
+
+ return resultx;
+
+// System.out.println("gggggggggggggggggggggg");
+// for (int i = 0; i < resultx.size(); i++) {
+// l += resultx.get(i).size();
+// System.out.println(resultx.get(i));
+// }
+
+// System.out.println(l);
+
+// for (String name : map.keySet()) {
+// List list = map.get(name);
+// list.add(name);
+// map.put(name, list);
+// }
+
+// for (int i = 0; i < similar.size(); i++) {
+// Node node = similar.get(i);
+// if (node.cosinesimilarity > 1.0) {
+// List set1 = map.get(node.name1);
+// set1.add(node.name2);
+// map.put(node.name1, set1);
+//
+// List set2 = map.get(node.name2);
+// set2.add(node.name1);
+// map.put(node.name2, set2);
+// }
+// }
+// System.out.println("Map = " + map);
+//
+// Collection> result = map.values();
+//
+// System.out.println("result = " + result);
+// System.out.println(result.size());
+//
+// int le = 0;
+// for(List aa : result) {
+// System.out.println(aa);
+// le += aa.size();
+// }
+// System.out.println(le);
+
+// System.out.println(similar);
+
+// for (int i = 0; i < similar.size(); i++) {
+// System.out.println(similar.get(i));
+// }
+
+ }
+
+// public static void main(String[] args) throws IOException {
+// // TODO Auto-generated method stub
+//// String s1 = "adity a";
+//// String s2 = "aditya x";
+////
+//// System.out.println(cosine_similarity(word2vec(s1), word2vec(s2)));
+//
+// Dataset dataset = Dataset.getDataset(path);
+// String fieldName = "Candidate";
+// Collection filteredRows = dataset.getRows().stream().collect(toList());
+//
+// assign_similarity(filteredRows, fieldName);
+//
+// }
+
+}
\ No newline at end of file
diff --git a/src/in/edu/ashoka/surf/CosineSimilarityAlgo.java b/src/in/edu/ashoka/surf/CosineSimilarityAlgo.java
new file mode 100644
index 0000000..461cc21
--- /dev/null
+++ b/src/in/edu/ashoka/surf/CosineSimilarityAlgo.java
@@ -0,0 +1,80 @@
+package in.edu.ashoka.surf;
+
+import com.google.common.collect.HashMultimap;
+import com.google.common.collect.SetMultimap;
+import edu.tsinghua.dbgroup.EditDistanceClusterer;
+import in.edu.ashoka.surf.util.Timers;
+import java.util.*;
+import java.util.stream.Collectors;
+import static java.util.stream.Collectors.toList;
+import java.io.IOException;
+
+public class CosineSimilarityAlgo extends MergeAlgorithm {
+
+ private final int inputval;
+ private final String fieldName; // FieldName on which to Cosine Similarity
+ private final Filter filter;
+
+ CosineSimilarityAlgo(Dataset dataset, String fieldName, int inputval, Filter filter) {
+ super (dataset);
+ this.filter = filter;
+ this.fieldName = fieldName;
+ this.inputval = inputval;
+ }
+
+ @Override
+ public List> run() {
+ Collection filteredRows = filter.isEmpty() ? dataset.getRows() : dataset.getRows().stream().filter(filter::passes).collect(toList());
+
+ SetMultimap fieldValueToRows = HashMultimap.create();
+ filteredRows.forEach(r -> fieldValueToRows.put(r.get(fieldName), r));
+
+// filteredRows.forEach(r -> System.out.println(r.get(fieldName)));
+ double acc = inputval/100.0;
+
+// System.out.println("--------------------------------------------------------------------" + acc);
+ CosineFunc func = new CosineFunc();
+
+ List> clusters;
+
+ Timers.cosineTimer.reset();
+ Timers.cosineTimer.start();
+
+ clusters = func.assign_similarity(filteredRows,fieldName,acc);
+
+ Timers.cosineTimer.stop();
+
+ System.out.println("--------------------------------------------------------The Time Taken is---------------------------------------------------------------");
+ Timers.log.info ("Time for cosine similarity computation: " + Timers.cosineTimer.toString());
+
+ int key = 0;
+ classes = new ArrayList<>();
+ for (Set cluster : clusters) {
+// System.out.println(key++ + " " + cluster);
+ final Collection rowsForThisCluster = new ArrayList<>();
+ // cluster just has strings, convert each string in the cluster to its rows, and
+ // add it to rowsForThisCluster
+ cluster.forEach(s -> {
+ rowsForThisCluster.addAll(fieldValueToRows.get(s));
+ });
+ classes.add(rowsForThisCluster);
+ }
+
+// System.out.println("--------------------------------------------------------------------");
+
+// classes.forEach(x -> System.out.println(x.toString()));
+
+ return classes;
+
+ }
+
+ /* debug method */
+ void dumpClasses() {
+ for (Collection rows: classes) {
+ log.info (rows.iterator().next().get(fieldName));
+ }
+ }
+
+ public String toString() { return "The cosine similarity algorithm works fine with inputval" + inputval; }
+
+}
\ No newline at end of file
diff --git a/src/in/edu/ashoka/surf/Dataset.java b/src/in/edu/ashoka/surf/Dataset.java
index c255533..b680c90 100755
--- a/src/in/edu/ashoka/surf/Dataset.java
+++ b/src/in/edu/ashoka/surf/Dataset.java
@@ -193,19 +193,31 @@ public void run() {
checkFilesForFailure(filename);
this.name = filename;
+
+// System.out.println("hellllllllllllllloooooooooooo");
Set allRows = new LinkedHashSet<>();
columnsToSave = new ArrayList<>();
int nRows = 0;
+ boolean isr = false;
// Reader in = new FileReader("GE.csv");
// read the names from CSV
Iterable records = CSVParser.parse(new File(filename), Charset.forName("UTF-8"), CSVFormat.EXCEL.withHeader());
for (CSVRecord record : records) {
+
nRows++;
Map map = record.toMap();
+
+ if(map.containsKey("Rid") == false) {
+ map.put("Rid", "0");
+ isr = true;
+ }
+
+// System.out.println(map);
if (nRows == 1) {
for (String col : map.keySet()) {
+// System.out.println(col);
columnsToSave.add(col);
registerColumn(col);
}
@@ -216,6 +228,11 @@ public void run() {
}
this.rows = allRows;
this.filename = filename;
+// System.out.println(columnsToSave);
+
+ if(isr == true) {
+ save();
+ }
}
private void checkFilesForFailure(String filename) throws IOException{
diff --git a/src/in/edu/ashoka/surf/EditDistanceMergeAlgorithm.java b/src/in/edu/ashoka/surf/EditDistanceMergeAlgorithm.java
index 09d670e..550c7b2 100644
--- a/src/in/edu/ashoka/surf/EditDistanceMergeAlgorithm.java
+++ b/src/in/edu/ashoka/surf/EditDistanceMergeAlgorithm.java
@@ -10,6 +10,8 @@
import static java.util.stream.Collectors.toList;
+import java.io.IOException;
+
/**
* Created by hangal on 8/12/17.
* New simplified edit distance merge manager.
@@ -35,9 +37,11 @@ public List> run() {
Collection filteredRows = filter.isEmpty() ? dataset.getRows() : dataset.getRows().stream().filter(filter::passes).collect(toList());
- // create map of fieldValueToRows
SetMultimap fieldValueToRows = HashMultimap.create();
filteredRows.forEach (r -> fieldValueToRows.put (r.get(fieldName), r));
+
+// System.out.println("--------------------------------------------------------------------");
+// filteredRows.forEach(r -> System.out.println(r.get(fieldName)));
// do the clustering based on ed (but only if ed > 0)
Timers.editDistanceTimer.reset();
@@ -69,11 +73,13 @@ public List> run() {
// compute the result of this algorithm
classes = new ArrayList<>();
for (Set cluster : clusters) {
+// System.out.println(cluster);
final Collection rowsForThisCluster = new ArrayList<>();
// cluster just has strings, convert each string in the cluster to its rows, and add it to rowsForThisCluster
cluster.forEach (s -> { rowsForThisCluster.addAll (fieldValueToRows.get(s)); });
classes.add (rowsForThisCluster);
}
+
return classes;
}
@@ -85,4 +91,13 @@ void dumpClasses() {
}
public String toString() { return "Edit distance algorithm with maximum edit distance " + maxEditDistance; }
+
+
+// public static void main(String args[]) throws IOException{
+// Dataset d = Dataset.getDataset("/Users/priyamgarrg21/Documents/Aditya/EX/TCPD_GE_Delhi_2020-6-18.csv");
+// Filter f = new Filter(null);
+// EditDistanceMergeAlgorithm aa = new EditDistanceMergeAlgorithm(d, "Candidate", 1, f);
+// aa.run();
+// }
+
}
\ No newline at end of file
diff --git a/src/in/edu/ashoka/surf/MergeManager.java b/src/in/edu/ashoka/surf/MergeManager.java
index 3c96ecb..18286a7 100755
--- a/src/in/edu/ashoka/surf/MergeManager.java
+++ b/src/in/edu/ashoka/surf/MergeManager.java
@@ -111,6 +111,7 @@ public String description() {
private final Multimap idToRows = LinkedHashMultimap.create();
private final SetMultimap rowToLabels = HashMultimap.create();
private int nextAvailableId = 0;
+ private int uniqueval = 0;
private final List allCommands = new ArrayList<>(); // compile all the commands, so that they can be replayed some day, if needed
@@ -170,6 +171,23 @@ public MergeManager(Dataset dataset, Map params) throws FileNotF
algorithm = new EditDistanceMergeAlgorithm(d, "_st_" + Config.MERGE_FIELD, editDistance, filter); // run e.d. on the _st_ version of the field
break;
+
+ case "reviewalgo":
+ algorithm = new NewReviewAlgorithm(d, Config.MERGE_FIELD, filter);
+ break;
+
+ case "cosinesimilarity":
+ int accuracy = 90;
+ try {
+ accuracy = Integer.parseInt(params.get("cosine-similarity"));
+ } catch (NumberFormatException e) {
+ Util.print_exception(e, log);
+ }
+// algorithm = new EditDistanceMergeAlgorithm(d, "_st_" + Config.MERGE_FIELD, 5, filter); // run e.d. on the _st_ version of the field
+ System.out.println("---------------------------------------------------------" + accuracy + "-------------------------------------------------------------------------");
+ algorithm = new CosineSimilarityAlgo(d, "_st_" + Config.MERGE_FIELD, accuracy, filter);
+ break;
+
case "allNames":
algorithm = new MergeAlgorithm(dataset) {
@Override
@@ -296,8 +314,19 @@ void updateMergesBasedOnIds() {
/* computes idToRows and also updates nextAvailableId */
private void computeIdToRows (Collection rows) {
- for (Row r: rows)
+// int i = 1;
+ for (Row r: rows) {
idToRows.put (r.get(Config.ID_FIELD), r);
+// System.out.println(r.get("Candidate") + "->" + i + "->" + r.get(Config.ID_FIELD));
+// i++;
+ }
+ int maxn = 0;
+ for(Row r: rows) {
+ if(Integer.parseInt(r.get("Rid")) >= maxn) {
+ maxn = Integer.parseInt(r.get("Rid"));
+ }
+ }
+ uniqueval = maxn + 1;
int maxNumberUsed = 1;
for (String id: idToRows.keySet()) {
@@ -329,15 +358,20 @@ public void applyUpdatesAndSave(Command[] commands) throws IOException {
firstId = id;
continue;
}
+// System.out.println(id);
+// System.out.println("aaaaaaaaaaaa");
// update all the rows for this id to firstId
// also remember to update the idToRows map
log.info("Merging id " + id + " into " + firstId);
Collection rowsForThisId = idToRows.get(id);
+// System.out.println(rowsForThisId);
+
if (rowsForThisId.size() == 0)
log.warn ("While trying to merge into id " + firstId + ", not found any rows for id: " + id);
for (Row row : rowsForThisId) {
+// System.out.println(row);
row.set(Config.ID_FIELD, firstId); // we wipe out the old id for this row
idToRows.get(firstId).add (row);
}
@@ -373,18 +407,69 @@ public void applyUpdatesAndSave(Command[] commands) throws IOException {
}
// create unique id's for all rows
for (String id : command.ids) {
+// System.out.println(id);
+ emptyRow = new Row(new LinkedHashMap<>(), toBeReviewed.rows.size(), toBeReviewed);
Collection rowsForThisId = idToRows.get(id);
if (rowsForThisId == null) {
log.warn ("rowsForThisID is null for id " + id);
continue;
}
+// System.out.println(rowsForThisId);
toBeReviewed.rows.addAll(rowsForThisId);
- toBeReviewed.rows.add(emptyRow); // add empty row with no data
+// toBeReviewed.rows.add(emptyRow); // add empty row with no data
}
-
- // add 2 empty rows with no data
+
+ emptyRow = new Row(new LinkedHashMap<>(), toBeReviewed.rows.size(), toBeReviewed);
+// System.out.println("a");
+ toBeReviewed.rows.add(emptyRow);
+// System.out.println("b");
toBeReviewed.rows.add(emptyRow);
+// System.out.println("c");
tbrNeedsToBeSaved = true;
+
+ System.out.println("--------------Review Algorithm Stats-----------");
+
+
+ int temp = 0;
+ for(String id: command.ids) {
+ Collection rowsForThisId = idToRows.get(id);
+// if (rowsForThisId == null) {
+// System.out.println("No Row for this ID");
+// continue;
+// }
+ for(Row row: rowsForThisId) {
+ if(Integer.parseInt(row.get("Rid")) >= temp) {
+ temp = Integer.parseInt(row.get("Rid"));
+ }
+ }
+ }
+
+ System.out.println("If existing rows are changed then temp!=0 otherwise it is 0:- "+ temp);
+ System.out.println("These rows will be given rid = "+ uniqueval);
+ if(temp != 0) {
+ for(String id: command.ids) {
+ Collection rowsForThisId = idToRows.get(id);
+ for(Row row: rowsForThisId) {
+ System.out.println("Candidate Name:- " + row.get("Candidate"));
+ row.set("Rid", Integer.toString(temp));
+ System.out.println("Its Given Rid:- " + row.get("Rid"));
+ }
+ }
+ }
+ else {
+ for(String id: command.ids) {
+ Collection rowsForThisId = idToRows.get(id);
+ for(Row row: rowsForThisId) {
+ System.out.println("Candidate Name:- " + row.get("Candidate"));
+ row.set("Rid", Integer.toString(uniqueval));
+ System.out.println("Its Given Rid:- " + row.get("Rid"));
+ }
+ }
+ uniqueval++;
+ }
+
+ d.save();
+
} else if ("add-label".equalsIgnoreCase(command.op)) {
String label = command.label;
for (String gid : command.ids) {
diff --git a/src/in/edu/ashoka/surf/NewReviewAlgorithm.java b/src/in/edu/ashoka/surf/NewReviewAlgorithm.java
new file mode 100644
index 0000000..d431663
--- /dev/null
+++ b/src/in/edu/ashoka/surf/NewReviewAlgorithm.java
@@ -0,0 +1,77 @@
+package in.edu.ashoka.surf;
+
+import com.google.common.collect.HashMultimap;
+import com.google.common.collect.SetMultimap;
+import edu.tsinghua.dbgroup.EditDistanceClusterer;
+import in.edu.ashoka.surf.util.Timers;
+import java.util.*;
+import java.util.stream.Collectors;
+import static java.util.stream.Collectors.toList;
+import java.io.IOException;
+
+public class NewReviewAlgorithm extends MergeAlgorithm {
+
+ private final String fieldName; // FieldName on which to Cosine Similarity
+ private final Filter filter;
+
+ NewReviewAlgorithm(Dataset dataset, String fieldName, Filter filter) {
+ super(dataset);
+ this.filter = filter;
+ this.fieldName = fieldName;
+ }
+
+ @Override
+ public List> run() {
+ Collection filteredRows = filter.isEmpty() ? dataset.getRows()
+ : dataset.getRows().stream().filter(filter::passes).collect(toList());
+
+ SetMultimap fieldValueToRows = HashMultimap.create();
+ filteredRows.forEach(r -> fieldValueToRows.put(r.get(fieldName), r));
+// filteredRows.forEach(r -> System.out.println(r.get(fieldName)));
+
+ Timers.ReviewTimer.reset();
+ Timers.ReviewTimer.start();
+
+ HashMap> map = new HashMap>();
+
+ for (Row row : filteredRows) {
+ int ridval = Integer.parseInt(row.get("Rid"));
+ if (ridval != 0) {
+ if (map.containsKey(ridval) == false) {
+ map.put(ridval, new ArrayList());
+ map.get(ridval).add(row);
+ } else {
+ map.get(ridval).add(row);
+ }
+ }
+ }
+
+ classes = new ArrayList<>();
+
+ for (Integer i : map.keySet()) {
+ final Collection rowsForThisCluster = map.get(i);
+ classes.add(rowsForThisCluster);
+ }
+
+ Timers.ReviewTimer.stop();
+ Timers.log.info ("TimeTaken by New Review Algo: " + Timers.ReviewTimer.toString());
+
+// System.out.println("--------------------------------------------------------------------");
+
+// classes.forEach(x -> System.out.println(x.toString()));
+
+ return classes;
+ }
+
+ /* debug method */
+ void dumpClasses() {
+ for (Collection rows : classes) {
+ log.info(rows.iterator().next().get(fieldName));
+ }
+ }
+
+ public String toString() {
+ return "The new review algo works fine";
+ }
+
+}
\ No newline at end of file
diff --git a/src/in/edu/ashoka/surf/ReviewAlgo.java b/src/in/edu/ashoka/surf/ReviewAlgo.java
new file mode 100644
index 0000000..9c6642a
--- /dev/null
+++ b/src/in/edu/ashoka/surf/ReviewAlgo.java
@@ -0,0 +1,88 @@
+package in.edu.ashoka.surf;
+
+import com.google.common.collect.HashMultimap;
+import com.google.common.collect.SetMultimap;
+import edu.tsinghua.dbgroup.EditDistanceClusterer;
+import in.edu.ashoka.surf.util.Timers;
+import java.util.*;
+import java.util.stream.Collectors;
+import static java.util.stream.Collectors.toList;
+import java.io.IOException;
+
+public class ReviewAlgo extends MergeAlgorithm {
+
+ private final String fieldName; //This Review Algorithm adds data to TBRfile System
+ private final Filter filter;
+
+ ReviewAlgo(Dataset dataset, String fieldName, Filter filter) {
+ super (dataset);
+ this.filter = filter;
+ this.fieldName = fieldName;
+ }
+
+ @Override
+ public List> run() {
+ Collection filteredRows = filter.isEmpty() ? dataset.getRows() : dataset.getRows().stream().filter(filter::passes).collect(toList());
+
+ SetMultimap fieldValueToRows = HashMultimap.create();
+ filteredRows.forEach(r -> fieldValueToRows.put(r.get(fieldName), r));
+
+// System.out.println("------------------------------------------------------------------------------------");
+// System.out.println(filteredRows.size());
+// filteredRows.forEach(r -> System.out.println(r.alldata()));
+// filteredRows.forEach(r -> System.out.println(r.get(fieldName)));
+// System.out.println("------------------------------------------------------------------------------------");
+
+ List> clusters = new ArrayList>();
+
+ Timers.cosineTimer.reset();
+ Timers.cosineTimer.start();
+
+ ArrayList namex = new ArrayList<>();
+ filteredRows.forEach(r -> namex.add(r.get(fieldName)));
+
+ int task = 1;
+ Set curr = null;
+ for (int i = 0; i < namex.size(); i++) {
+ if(task == 1) {
+ curr = new LinkedHashSet();
+ }
+ if(namex.get(i).length() != 0) {
+ curr.add(namex.get(i));
+ task = 0;
+ }
+ else {
+ task++;
+ if(i!=0 && task == 1) {
+ clusters.add(curr);
+ }
+ }
+ }
+
+ classes = new ArrayList<>();
+ for (Set cluster : clusters) {
+ final Collection rowsForThisCluster = new ArrayList<>();
+ cluster.forEach(s -> {
+ rowsForThisCluster.addAll(fieldValueToRows.get(s));
+ });
+ classes.add(rowsForThisCluster);
+ }
+
+
+ Timers.cosineTimer.stop();
+ Timers.log.info ("Time for Review Algo: " + Timers.cosineTimer.toString());
+
+ return classes;
+
+ }
+
+ /* debug method */
+ void dumpClasses() {
+ for (Collection rows: classes) {
+ log.info (rows.iterator().next().get(fieldName));
+ }
+ }
+
+ public String toString() { return "The Review Algo Works Fine";}
+
+}
diff --git a/src/in/edu/ashoka/surf/Row.java b/src/in/edu/ashoka/surf/Row.java
index 50427aa..007337b 100755
--- a/src/in/edu/ashoka/surf/Row.java
+++ b/src/in/edu/ashoka/surf/Row.java
@@ -77,6 +77,14 @@ public void set(String col, String val) {
public int nFields () {
return this.fields.keySet().size();
}
+
+ public String alldata() {
+ String ss = "";
+ for(String str : fields.keySet()) {
+ ss = ss + (" " + str + ":" + fields.get(str));
+ }
+ return ss;
+ }
public Set getAllFieldNames () {
return fields.keySet();
diff --git a/src/in/edu/ashoka/surf/Test2.java b/src/in/edu/ashoka/surf/Test2.java
index 6a03305..982ac84 100755
--- a/src/in/edu/ashoka/surf/Test2.java
+++ b/src/in/edu/ashoka/surf/Test2.java
@@ -11,7 +11,7 @@
class Test2 {
public static void main(String[] args) throws IOException {
- Dataset d = Dataset.getDataset("/home/sudx/surf.java/surf/GE/candidates/csv/candidates_info.csv");
+ Dataset d = Dataset.getDataset("/Users/priyamgarrg21/Documents/Aditya/EX/TCPD_GE_Delhi_2020-6-18.csv");
Collection rows = d.rows;
//set ups what toString() of Row needs to print
diff --git a/src/in/edu/ashoka/surf/newtest.java b/src/in/edu/ashoka/surf/newtest.java
new file mode 100644
index 0000000..ce5dacc
--- /dev/null
+++ b/src/in/edu/ashoka/surf/newtest.java
@@ -0,0 +1,85 @@
+package in.edu.ashoka.surf;
+
+import in.edu.ashoka.surf.*;
+import edu.tsinghua.dbgroup.EditDistanceClusterer;
+import in.edu.ashoka.surf.Dataset;
+import in.edu.ashoka.surf.Row;
+import in.edu.ashoka.surf.util.Timers;
+import static java.util.stream.Collectors.toList;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.LinkedHashSet;
+import java.util.List;
+import java.util.Set;
+import java.util.stream.Collectors;
+import com.google.common.collect.HashMultimap;
+import com.google.common.collect.SetMultimap;
+
+class Test1 {
+
+ private static final String path = "/Users/priyamgarrg21/Documents/Aditya/Internship@Ashoka/TCPD_GE_Delhi_2020-6-18.csv";
+
+ public static void main(String args[]) throws IOException {
+ Dataset dataset = Dataset.getDataset(path);
+ Filter filter = new Filter(null);
+ String fieldName = "Candidate";
+ int maxEditDistance = 1;
+ List> classes;
+
+// Set names = d.getRows().stream().map(r -> r.get("Candidate")).collect(Collectors.toSet());
+//
+// EditDistanceClusterer edc = new EditDistanceClusterer(5);
+// names.forEach(edc::populate);
+// List> clusters = (List) edc.getClusters();
+//
+// int i = 0;
+// for (Set cluster : clusters) {
+// System.out.println("Cluster " + i++ + " -------");
+// for (String s : cluster)
+// System.out.println(s);
+// }
+
+ Collection filteredRows = filter.isEmpty() ? dataset.getRows()
+ : dataset.getRows().stream().filter(filter::passes).collect(toList());
+
+ SetMultimap fieldValueToRows = HashMultimap.create();
+ filteredRows.forEach(r -> fieldValueToRows.put(r.get(fieldName), r));
+
+ // do the clustering based on ed (but only if ed > 0)
+
+ List> clusters;
+
+ if (maxEditDistance >= 1) {
+ final EditDistanceClusterer edc = new EditDistanceClusterer(maxEditDistance);
+ filteredRows.forEach(r -> edc.populate(r.get(fieldName)));
+ clusters = (List) edc.getClusters();
+ } else {
+ // handle the case when edit distance is 0 by creating a list of single-element
+ // sets with all unique fieldVal's
+ clusters = new ArrayList<>();
+ for (String fieldVal : fieldValueToRows.keySet()) {
+ // create a set with a single val
+ Set set = new LinkedHashSet();
+ set.add(fieldVal);
+ clusters.add(set);
+ }
+ }
+
+ // compute the result of this algorithm
+ classes = new ArrayList<>();
+ for (Set cluster : clusters) {
+ System.out.println("----gjkkklkjhgfds------");
+ System.out.println(cluster);
+ final Collection rowsForThisCluster = new ArrayList<>();
+ // cluster just has strings, convert each string in the cluster to its rows, and
+ // add it to rowsForThisCluster
+ cluster.forEach(s -> {
+ rowsForThisCluster.addAll(fieldValueToRows.get(s));
+ });
+ classes.add(rowsForThisCluster);
+ }
+
+ }
+}
+
diff --git a/src/in/edu/ashoka/surf/test/T2.java b/src/in/edu/ashoka/surf/test/T2.java
new file mode 100644
index 0000000..9bd2724
--- /dev/null
+++ b/src/in/edu/ashoka/surf/test/T2.java
@@ -0,0 +1,86 @@
+package in.edu.ashoka.surf.test;
+
+import in.edu.ashoka.surf.*;
+import edu.tsinghua.dbgroup.EditDistanceClusterer;
+import in.edu.ashoka.surf.Dataset;
+import in.edu.ashoka.surf.Row;
+import in.edu.ashoka.surf.util.Timers;
+import static java.util.stream.Collectors.toList;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.LinkedHashSet;
+import java.util.List;
+import java.util.Set;
+import java.util.stream.Collectors;
+import com.google.common.collect.HashMultimap;
+import com.google.common.collect.SetMultimap;
+
+class T2 {
+
+ private static final String path = "/Users/priyamgarrg21/Documents/Aditya/Internship@Ashoka/TCPD_GE_Delhi_2020-6-18.csv";
+
+ public static void main(String args[]) throws IOException {
+ Dataset dataset = Dataset.getDataset(path);
+ String fieldName = "Candidate";
+ int maxEditDistance = 5;
+ List> classes;
+
+// Set names = dataset.getRows().stream().map(r -> r.get("Candidate")).collect(Collectors.toSet());
+//
+// EditDistanceClusterer edcx = new EditDistanceClusterer(5);
+// names.forEach(edcx::populate);
+// List> clustersx = (List) edcx.getClusters();
+//
+// int ix = 0;
+// for (Set cluster : clustersx) {
+// System.out.println("Cluster " + ix++ + " -------");
+// for (String s : cluster)
+// System.out.println(s);
+// }
+// System.out.println("--------------------------------------------------------------------");
+
+ Collection filteredRows = dataset.getRows().stream().collect(toList());
+
+ SetMultimap fieldValueToRows = HashMultimap.create();
+ filteredRows.forEach(r -> fieldValueToRows.put(r.get(fieldName), r));
+
+ // do the clustering based on ed (but only if ed > 0)
+
+ filteredRows.forEach(r -> System.out.println(r.get(fieldName)));
+
+ List> clusters;
+
+ if (maxEditDistance >= 1) {
+ final EditDistanceClusterer edc = new EditDistanceClusterer(maxEditDistance);
+ filteredRows.forEach(r -> edc.populate(r.get(fieldName)));
+ clusters = (List) edc.getClusters();
+ } else {
+ // handle the case when edit distance is 0 by creating a list of single-element
+ // sets with all unique fieldVal's
+ clusters = new ArrayList<>();
+ for (String fieldVal : fieldValueToRows.keySet()) {
+ // create a set with a single val
+ Set set = new LinkedHashSet();
+ set.add(fieldVal);
+ clusters.add(set);
+ }
+ }
+
+ // compute the result of this algorithm
+ classes = new ArrayList<>();
+ for (Set cluster : clusters) {
+ System.out.println(cluster);
+ final Collection rowsForThisCluster = new ArrayList<>();
+ // cluster just has strings, convert each string in the cluster to its rows, and
+ // add it to rowsForThisCluster
+ cluster.forEach(s -> {
+ rowsForThisCluster.addAll(fieldValueToRows.get(s));
+ });
+ classes.add(rowsForThisCluster);
+ }
+
+ classes.forEach(x -> System.out.println(x.toString()));
+
+ }
+}
diff --git a/src/in/edu/ashoka/surf/test/T3.java b/src/in/edu/ashoka/surf/test/T3.java
new file mode 100644
index 0000000..0bce1c1
--- /dev/null
+++ b/src/in/edu/ashoka/surf/test/T3.java
@@ -0,0 +1,75 @@
+package in.edu.ashoka.surf.test;
+
+import static java.util.stream.Collectors.toList;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.LinkedHashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import com.google.common.collect.HashMultimap;
+import com.google.common.collect.SetMultimap;
+
+import in.edu.ashoka.surf.Dataset;
+import in.edu.ashoka.surf.Row;
+
+class T3 {
+
+ static final String path = "/Users/priyamgarrg21/Documents/Aditya/Internship@Ashoka/TCPD_GE_Delhi_2020-6-18.csv";
+
+ public static void main(String args[]) throws IOException {
+ Dataset dataset = Dataset.getDataset(path);
+ String fieldName = "Candidate";
+ int maxEditDistance = 5;
+ List> classes;
+
+ cos_sample mainfunc = new cos_sample();
+
+// System.out.println(mainfunc.cosine_similarity(mainfunc.word2vec("adity a"), mainfunc.word2vec("aditya xs")));
+
+ Collection filteredRows = dataset.getRows().stream().collect(toList());
+
+ SetMultimap fieldValueToRows = HashMultimap.create();
+ filteredRows.forEach(r -> fieldValueToRows.put(r.get(fieldName), r));
+
+// Iterator iterator = filteredRows.iterator();
+
+// while(iterator.hasNext()) {
+// System.out.println(iterator.next().get(fieldName));
+// }
+
+// filteredRows.forEach(r -> System.out.println(r.get(fieldName)));
+
+ System.out.println("--------------------------------------------------------------------");
+
+ List> clusters = mainfunc.assign_similarity(filteredRows, fieldName);
+
+// for (Row fil : filteredRows) {
+//// System.out.println(fil.get(fieldName));
+// Set setx = new LinkedHashSet();
+// setx.add(fil.get(fieldName));
+// clusters.add(setx);
+// }
+
+ int key = 0;
+ classes = new ArrayList<>();
+ for (Set cluster : clusters) {
+ System.out.println(key++ + " " + cluster);
+ final Collection rowsForThisCluster = new ArrayList<>();
+ cluster.forEach(s -> {
+ rowsForThisCluster.addAll(fieldValueToRows.get(s));
+ });
+ classes.add(rowsForThisCluster);
+ }
+
+ System.out.println("--------------------------------------------------------------------");
+
+ classes.forEach(x -> System.out.println(x.toString()));
+
+ }
+}
diff --git a/src/in/edu/ashoka/surf/test/Test1.java b/src/in/edu/ashoka/surf/test/Test1.java
index 10e0823..78bba66 100644
--- a/src/in/edu/ashoka/surf/test/Test1.java
+++ b/src/in/edu/ashoka/surf/test/Test1.java
@@ -13,12 +13,12 @@
*/
class Test1 {
- private static final String path = "/Users/hangal/Gujarat_worksheet.csv";
+ private static final String path = "/Users/priyamgarrg21/Documents/Aditya/Internship@Ashoka/TCPD_GE_Delhi_2020-6-18.csv";
public static void main(String args[]) throws IOException {
Dataset d = Dataset.getDataset(path);
- Set names = d.getRows().stream().map (r -> r.get("Name")).collect (Collectors.toSet());
+ Set names = d.getRows().stream().map (r -> r.get("Candidate")).collect (Collectors.toSet());
- EditDistanceClusterer edc = new EditDistanceClusterer(1);
+ EditDistanceClusterer edc = new EditDistanceClusterer(5);
names.forEach (edc::populate);
List> clusters = (List) edc.getClusters();
diff --git a/src/in/edu/ashoka/surf/test/cos_sample.java b/src/in/edu/ashoka/surf/test/cos_sample.java
new file mode 100644
index 0000000..cee32a5
--- /dev/null
+++ b/src/in/edu/ashoka/surf/test/cos_sample.java
@@ -0,0 +1,258 @@
+package in.edu.ashoka.surf.test;
+
+import static java.util.stream.Collectors.toList;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.LinkedHashSet;
+import java.util.List;
+import java.util.Set;
+
+import in.edu.ashoka.surf.Dataset;
+import in.edu.ashoka.surf.Row;
+
+class Node {
+ String name1;
+ String name2;
+ double cosinesimilarity;
+ int index;
+
+ public Node(String name1, String name2, double cosinesimilarity, int index) {
+ this.name1 = name1;
+ this.name2 = name2;
+ this.cosinesimilarity = cosinesimilarity;
+ this.index = index;
+ }
+
+ public String toString() {
+ return index + " " + name1 + " " + name2 + " " + cosinesimilarity;
+ }
+}
+
+class obj {
+
+ HashMap hash;
+ Set char_set;
+ double length;
+ String word;
+
+ public HashMap getHash() {
+ return hash;
+ }
+
+ public void setHash(HashMap hash) {
+ this.hash = hash;
+ }
+
+ public Set getChar_set() {
+ return char_set;
+ }
+
+ public void setChar_set(Set char_set) {
+ this.char_set = char_set;
+ }
+
+ public double getLength() {
+ return length;
+ }
+
+ public void setLength(int length) {
+ this.length = length;
+ }
+
+ public String getWord() {
+ return word;
+ }
+
+ public void setWord(String word) {
+ this.word = word;
+ }
+
+ public obj(HashMap hash, Set char_set, double length, String word) {
+ this.hash = hash;
+ this.char_set = char_set;
+ this.length = length;
+ this.word = word;
+ }
+
+}
+
+public class cos_sample {
+
+ static final String path = "/Users/priyamgarrg21/Documents/Aditya/Internship@Ashoka/TCPD_GE_Delhi_2020-6-18.csv";
+
+ public static HashMap Count(String inputString) {
+ HashMap charCountMap = new HashMap();
+
+ char[] strArray = inputString.toCharArray();
+
+ for (char c : strArray) {
+ if (charCountMap.containsKey(c)) {
+
+ charCountMap.put(c, charCountMap.get(c) + 1);
+ } else {
+
+ charCountMap.put(c, 1);
+ }
+ }
+
+ return charCountMap;
+
+ }
+
+ public static obj word2vec(String word) {
+ HashMap count_characters = Count(word);
+// System.out.println(count_characters);
+ Set set_characters = count_characters.keySet();
+// System.out.println(set_characters);
+
+ double length = 0;
+ int key = 0;
+
+ for (Integer in : count_characters.values()) {
+ key += (in * in);
+ }
+ length = Math.sqrt(key);
+// System.out.println(length);
+
+ return new obj(count_characters, set_characters, length, word);
+
+ }
+
+ public static double cosine_similarity(obj vector1, obj vector2) {
+ Set common_characters = new HashSet(vector1.getChar_set()); // use the copy constructor
+ common_characters.retainAll(vector2.getChar_set());
+// System.out.println("Intersection = " + common_characters);
+
+ int product_summation = 0;
+ for (Character ch : common_characters) {
+ product_summation += vector1.getHash().get(ch) * vector2.getHash().get(ch);
+ }
+// System.out.println("product_summation = " + product_summation);
+
+ double length = vector1.length * vector2.length;
+// System.out.println("length = " + length);
+
+ if (length == 0) {
+ return 0;
+ } else {
+ return product_summation / length;
+ }
+
+ }
+
+ public static List> assign_similarity(Collection filteredRows, String fieldName) {
+
+ HashMap> map = new HashMap<>();
+ ArrayList names = new ArrayList<>();
+ List> resultx = new ArrayList>();
+ filteredRows.forEach(r -> names.add(r.get(fieldName)));
+ filteredRows.forEach(r -> map.put(r.get(fieldName), new ArrayList<>()));
+ ArrayList similar = new ArrayList<>();
+ boolean visited[] = new boolean[names.size()];
+
+// System.out.println("Map = " + map);
+
+// for (int i = 0; i < names.size(); i++) {
+// System.out.println(i + " " + names.get(i));
+// }
+
+ for (int i = 0; i < names.size(); i++) {
+ String one = names.get(i);
+ int task = 0;
+ Set curr = null;
+ if (visited[i] == false) {
+ task = 1;
+ curr = new LinkedHashSet();
+ visited[i] = true;
+ curr.add(one);
+ }
+ for (int j = i + 1; j < names.size(); j++) {
+ String two = names.get(j);
+ Node nn = new Node(one, two, cosine_similarity(word2vec(one), word2vec(two)), i);
+ similar.add(nn);
+
+ if (task == 1) {
+// System.out.println("hello");
+ if (nn.cosinesimilarity >= 0.7 && visited[j] == false) {
+// System.out.println("adi");
+ curr.add(two);
+ visited[j] = true;
+ }
+ }
+ }
+ if (task == 1) {
+ resultx.add(curr);
+ }
+ }
+ int l = 0;
+
+ return resultx;
+
+// System.out.println("gggggggggggggggggggggg");
+// for (int i = 0; i < resultx.size(); i++) {
+// l += resultx.get(i).size();
+// System.out.println(resultx.get(i));
+// }
+
+// System.out.println(l);
+
+// for (String name : map.keySet()) {
+// List list = map.get(name);
+// list.add(name);
+// map.put(name, list);
+// }
+
+// for (int i = 0; i < similar.size(); i++) {
+// Node node = similar.get(i);
+// if (node.cosinesimilarity > 1.0) {
+// List set1 = map.get(node.name1);
+// set1.add(node.name2);
+// map.put(node.name1, set1);
+//
+// List set2 = map.get(node.name2);
+// set2.add(node.name1);
+// map.put(node.name2, set2);
+// }
+// }
+// System.out.println("Map = " + map);
+//
+// Collection> result = map.values();
+//
+// System.out.println("result = " + result);
+// System.out.println(result.size());
+//
+// int le = 0;
+// for(List aa : result) {
+// System.out.println(aa);
+// le += aa.size();
+// }
+// System.out.println(le);
+
+// System.out.println(similar);
+
+// for (int i = 0; i < similar.size(); i++) {
+// System.out.println(similar.get(i));
+// }
+
+ }
+
+ public static void main(String[] args) throws IOException {
+ // TODO Auto-generated method stub
+// String s1 = "adity a";
+// String s2 = "aditya x";
+//
+// System.out.println(cosine_similarity(word2vec(s1), word2vec(s2)));
+
+ Dataset dataset = Dataset.getDataset(path);
+ String fieldName = "Candidate";
+ Collection filteredRows = dataset.getRows().stream().collect(toList());
+
+ assign_similarity(filteredRows, fieldName);
+
+ }
+
+}
diff --git a/src/in/edu/ashoka/surf/util/Timers.java b/src/in/edu/ashoka/surf/util/Timers.java
index ae0ab92..7c3b143 100644
--- a/src/in/edu/ashoka/surf/util/Timers.java
+++ b/src/in/edu/ashoka/surf/util/Timers.java
@@ -14,6 +14,9 @@ public class Timers {
public static final StopWatch tokenizationTimer = new StopWatch();
public static final StopWatch editDistanceTimer = new StopWatch();
public static final StopWatch unionFindTimer = new StopWatch();
+ public static final StopWatch cosineTimer = new StopWatch();
+ public static final StopWatch CompatibleNameTimer = new StopWatch();
+ public static final StopWatch ReviewTimer = new StopWatch();
public static void print() {
log.info ("Canonicalization: " + canonTimer);
@@ -21,4 +24,6 @@ public static void print() {
log.info ("Edit distance computation: " + editDistanceTimer);
log.info ("Union Find: " + unionFindTimer);
}
+
+
}