diff --git a/src/main/java/uk/ac/susx/tag/classificationframework/clusters/clusteranalysis/ClusterFeatureAnalysis.java b/src/main/java/uk/ac/susx/tag/classificationframework/clusters/clusteranalysis/ClusterFeatureAnalysis.java
index 237b1f9..b7a339c 100644
--- a/src/main/java/uk/ac/susx/tag/classificationframework/clusters/clusteranalysis/ClusterFeatureAnalysis.java
+++ b/src/main/java/uk/ac/susx/tag/classificationframework/clusters/clusteranalysis/ClusterFeatureAnalysis.java
@@ -545,7 +545,7 @@ public static FeatureBasedCounts loadBackgroundCounter(File inputFile) throws IO
     public static List<Instance> readCsv(String inpath) {
         List<Instance> list = new ArrayList<Instance>();
         try {
-            Reader reader = Files.newBufferedReader(Paths.get(inpath) , StandardCharsets.UTF_16);
+            Reader reader = Files.newBufferedReader(Paths.get(inpath) , StandardCharsets.UTF_8);
             CSVParser csvParser = new CSVParser(reader, CSVFormat.DEFAULT
 //                    change based on the the number of columns in the file, my file had only text column which contains the articles
                     .withHeader("text")
@@ -553,7 +553,7 @@ public static List<Instance> readCsv(String inpath) {
                     .withTrim());
 
             for (CSVRecord csvRecord: csvParser) {
-                String text = csvRecord.get("text").replaceAll("[^\u0000-\u200f]", "");
+                String text = csvRecord.get("text");
 //                This was added by Qiwiei to check if there is some errors in the file i guess for missing values i kept it but change the size to the number of columns your file have in my case only one column
                 if (csvRecord.size()!=1) {
                     System.out.println("!!!!!!! corrupted instance !!!!!");
@@ -619,17 +619,6 @@ public static void print_instance(Iterable<Instance> corpus) {
 
     public static void main(String[] args) throws IOException, ClassNotFoundException {
 
-//        // Deserialised it to see if it contains the lang key
-//        ObjectInputStream ios = new ObjectInputStream(new FileInputStream("/Users/ay227/Desktop/CASM/Arabic_background/ar-wiki-pipeline.ser"));
-//        FeatureExtractionPipeline temp;
-//        try {
-//            while ((temp = (FeatureExtractionPipeline) ios.readObject()) != null) {
-//                System.out.println(temp);
-//            }
-//        } catch (EOFException e) {
-//        } finally {
-//            ios.close();
-//        }
 
         List<String> topFeatures = Lists.newArrayList(
                 "visualisation",
@@ -645,56 +634,49 @@ public static void main(String[] args) throws IOException, ClassNotFoundExceptio
         );
 
 //        build the pipeline
-        FeatureExtractionPipeline arabic_pipeline = new PipelineBuilder().build(new PipelineBuilder.OptionList() // Instantiate the pipeline.
+        FeatureExtractionPipeline french_pipeline = new PipelineBuilder().build(new PipelineBuilder.OptionList() // Instantiate the pipeline.
                 .add("tokeniser", ImmutableMap.of(
-                        "type", "arabicstanford",
+                        "type", "basic",
                         "filter_punctuation", true,
-                        "normalise_urls", true
+                        "normalise_urls", true,
+                        "lower_case", true
                         )
                 )
                 .add("remove_stopwords", ImmutableMap.of(
                         "use", "true",
-                        "lang", "ar"))
+                        "lang", "fr"))
                 .add("filter_regex", "[\\-（()）【\\[\\]】]")
                 .add("unigrams", true)
         );
 
-//      Read the csv file that contains the article and save it in articles.ser
-        List<Instance> bl = savecorpus(new File("/Users/ay227/Desktop/CASM/Arabic_background/ar-wiki-articles.ser"), "/Users/ay227/Desktop/CASM/source_background/ar_background_10.csv");
-//        to test the correctness of serializing articles load the generated file
-        Iterable<Instance> bl_test = load(new File("/Users/ay227/Desktop/CASM/Arabic_background/ar-wiki-articles.ser"));
-//        load one of the files previously generated by other language
-        Iterable<Instance> bl_en_test = load(new File("/Users/ay227/Desktop/CASM/git/wikisample-withzh/sample/zh-wiki-articles.ser"));
+        //      Read the csv file that contains the article and save it in articles.ser
+        List<Instance> bl = savecorpus(new File("/Users/ay227/Desktop/CASM/M52/generate_background/French_background/fr-wiki-articles.ser"), "/Users/ay227/Desktop/CASM/M52/generate_background/French_background/fr_backgroud.csv");
+        //        to test the correctness of serializing articles load the generated file
 
         System.out.println("Done loading articles");
-//        to print the count of the serialized articles for example for the output should be
-//        done loading articles
-//        36205 the size of the generated Arabic file (approximately)
-//        15000 the size of english
-        System.out.println(count_size(bl_test));
-        System.out.println(count_size(bl_en_test));
-//        to print the serialized articles
-        print_instance(bl_test);
-
-        savepipeline(new File("/Users/ay227/Desktop/CASM/Arabic_background/ar-wiki-pipeline.ser"), arabic_pipeline);
+
+        //        to print the serialized articles
+        print_instance(bl);
+        //        to print the count of the serialized articles for example for the output should be
+        System.out.println(count_size(bl));
+
+        savepipeline(new File("/Users/ay227/Desktop/CASM/M52/generate_background/French_background/fr-wiki-pipeline.ser"), french_pipeline);
         System.out.println("Done saving pipeline.ser");
 
 
 
-        FeatureBasedCounts counter1 = saveNewBackgroundCounter(new File("/Users/ay227/Desktop/CASM/Arabic_background/ar-wiki-count.ser"), 1, bl, arabic_pipeline, 3);
-        System.out.println("Done testing FeatureBasedCounts");
+//        FeatureBasedCounts counter1 = saveNewBackgroundCounter(new File("/Users/ay227/Desktop/CASM/M52/French_background/fr-wiki-count.ser"), 1, bl, french_pipeline, 3);
+//        System.out.println("Done testing FeatureBasedCounts");
 
         IncrementalFeatureCounter cNew = new IncrementalFeatureCounter(0.1);
-        cNew.incrementCounts(bl, arabic_pipeline, 10);
+        cNew.incrementCounts(bl, french_pipeline, 10);
         cNew.pruneFeaturesWithCountLessThanN(3);
         System.out.println("Done testing IncrementalFeatureCounter");
 
-        try (ObjectOutputStream out = new ObjectOutputStream(new FileOutputStream("/Users/ay227/Desktop/CASM/Arabic_background/ar-wiki-inc-feat-counts.ser"))){
+        try (ObjectOutputStream out = new ObjectOutputStream(new FileOutputStream("/Users/ay227/Desktop/CASM/M52/generate_background/French_background/fr-wiki-inc-feat-counts.ser"))){
             out.writeObject(cNew);
         }
-        System.out.println("Done saving ar-wiki-inc-feat-counts.ser");
-
-
+        System.out.println("Done saving wiki-inc-feat-counts.ser");
 
     }
 
diff --git a/src/main/java/uk/ac/susx/tag/classificationframework/featureextraction/filtering/TokenFilterRelevanceStopwords.java b/src/main/java/uk/ac/susx/tag/classificationframework/featureextraction/filtering/TokenFilterRelevanceStopwords.java
index f72655c..56afc5f 100644
--- a/src/main/java/uk/ac/susx/tag/classificationframework/featureextraction/filtering/TokenFilterRelevanceStopwords.java
+++ b/src/main/java/uk/ac/susx/tag/classificationframework/featureextraction/filtering/TokenFilterRelevanceStopwords.java
@@ -188,6 +188,22 @@ public class TokenFilterRelevanceStopwords extends TokenFilter {
                 "unsen","unser","unsere","unserer","unses","unter","v","vergangenen","viel","viele","vielem","vielen","vielleicht","vier","vierte","vierten","vierter","viertes","vom","von","vor","w","wahr","wann","war","waren","warst","wart","warum","was","weg","wegen","weil","weit","weiter","weitere","weiteren","weiteres","welche","welchem","welchen",
                 "welcher","welches","wem","wen","wenig","wenige","weniger","weniges","wenigstens","wenn","wer","werde","werden","werdet","weshalb","wessen","wie","wieder","wieso","will","willst","wir","wird","wirklich","wirst","wissen","wo","woher","wohin","wohl","wollen","wollt","wollte","wollten","worden","wurde","wurden","während","währenddem","währenddessen","wäre","würde",
                 "würden","x","y","z","z.b","zehn","zehnte","zehnten","zehnter","zehntes","zeit","zu","zuerst","zugleich","zum","zunächst","zur","zurück","zusammen","zwanzig","zwar","zwei","zweite","zweiten","zweiter","zweites","zwischen","zwölf","über","überhaupt","übrigens"));
+
+        put("fr", Sets.newHashSet("a", "abord", "absolument", "afin", "ah", "ai", "aie", "aient", "aies", "ailleurs", "ainsi", "ait", "allaient", "allo", "allons", "allô", "alors", "anterieur", "anterieure", "anterieures", "apres", "après", "as", "assez", "attendu", "au", "aucun", "aucune", "aucuns", "aujourd", "aujourd'hui", "aupres", "auquel", "aura", "aurai", "auraient", "aurais", "aurait", "auras", "aurez", "auriez",
+                "aurions", "aurons", "auront", "aussi", "autant", "autre", "autrefois", "autrement", "autres", "autrui", "aux", "auxquelles", "auxquels", "avaient", "avais", "avait", "avant", "avec", "avez", "aviez", "avions", "avoir", "avons", "ayant", "ayez", "ayons", "b", "bah", "bas", "basee", "bat", "beau", "beaucoup", "bien", "bigre", "bon", "boum", "bravo", "brrr", "c", "car", "ce", "ceci", "cela", "celle", "celle-ci", "celle-là",
+                "celles", "celles-ci", "celles-là", "celui", "celui-ci", "celui-là", "celà", "cent", "cependant", "certain", "certaine", "certaines", "certains", "certes", "ces", "cet", "cette", "ceux", "ceux-ci", "ceux-là", "chacun", "chacune", "chaque", "cher", "chers", "chez", "chiche", "chut", "chère", "chères", "ci", "cinq", "cinquantaine", "cinquante", "cinquantième", "cinquième", "clac", "clic", "combien", "comme", "comment", "comparable",
+                "comparables", "compris", "concernant", "contre", "couic", "crac", "d", "da", "dans", "de", "debout", "dedans", "dehors", "deja", "delà", "depuis", "dernier", "derniere", "derriere", "derrière", "des", "desormais", "desquelles", "desquels", "dessous", "dessus", "deux", "deuxième", "deuxièmement", "devant", "devers", "devra", "devrait", "different", "differentes", "différent", "différente", "différentes", "différents", "dire", "directe",
+                "directement", "dit", "dite", "dits", "divers", "diverse", "diverses", "dix", "dix-huit", "dix-neuf", "dix-sept", "dixième", "doit", "doivent", "donc", "dont", "dos", "douze", "douzième", "dring", "droite", "du", "duquel", "durant", "dès", "début", "désormais", "e", "effet", "egale", "egalement", "egales", "eh", "elle", "elle-même", "elles", "elles-mêmes", "en", "encore", "enfin", "entre", "envers", "environ", "es", "essai", "est", "et",
+                "etant", "etc", "etre", "eu", "eue", "eues", "euh", "eurent", "eus", "eusse", "eussent", "eusses", "eussiez", "eussions", "eut", "eux", "eux-mêmes", "exactement", "excepté", "extenso", "exterieur", "eûmes", "eût", "eûtes", "f", "fais", "faisaient", "faisant", "fait", "faites", "façon", "feront", "fi", "flac", "floc", "fois", "font", "force", "furent", "fus", "fussefussent", "fusses", "fussiez", "fussions", "fut", "fûmes", "fût", "fûtes",
+                "g", "gens", "h", "ha", "haut", "hein", "hem", "hep", "hi", "ho", "holà", "hop", "hormis", "hors", "hou", "houp", "hue", "hui", "huit", "huitième", "hum", "hurrah", "hé", "hélas", "i", "ici", "il", "ils", "importe", "j", "je", "jusqu", "jusquejuste", "k", "l", "la", "laisser", "laquelle", "las", "le", "lequel", "les", "lesquelles", "lesquels", "leur", "leurs", "longtemps", "lors", "lorsque", "lui", "lui-meme", "lui-même", "là", "lès", "m",
+                "ma", "maint", "maintenant", "mais", "malgre", "malgré", "maximale", "me", "meme", "memes", "merci", "mes", "mien", "mienne", "miennes", "miens", "mille", "mineminimalemoimoi-meme", "moi-même", "moindres", "moins", "mon", "mot", "moyennant", "multiple", "multiples", "même", "mêmes", "n", "na", "naturel", "naturelle", "naturelles", "ne", "neanmoins", "necessaire", "necessairement", "neuf", "neuvième", "ni", "nombreuses", "nombreux", "nommés",
+                "non", "nos", "notamment", "notre", "nous", "nous-mêmes", "nouveau", "nouveaux", "nul", "néanmoins", "nôtre", "nôtres", "o", "oh", "ohé", "ollé", "olé", "on", "ont", "onze", "onzième", "ore", "ou", "ouf", "ouias", "oust", "ouste", "outre", "ouvert", "ouverte", "ouverts", "où", "p", "paf", "pan", "par", "parce", "parfoisparle", "parlent", "parler", "parmi", "parole", "parseme", "partant", "particulier", "particulière", "particulièrement", "pas",
+                "passé", "pendant", "pense", "permet", "personne", "personnes", "peu", "peut", "peuvent", "peux", "pff", "pfft", "pfut", "pif", "pire", "pièce", "plein", "plouf", "plupart", "plus", "plusieurs", "plutôt", "possessif", "possessifs", "possible", "possibles", "pouah", "pour", "pourquoi", "pourrais", "pourrait", "pouvait", "prealable", "precisement", "premier", "première", "premièrement", "pres", "probable", "probante", "procedant", "proche", "près",
+                "psitt", "pu", "puis", "puisque", "pur", "pure", "q", "qu", "quand", "quant", "quant-à-soi", "quanta", "quarante", "quatorze", "quatre", "quatre-vingt", "quatrième", "quatrièmement", "que", "quel", "quelconque", "quelle", "quelles", "quelquun", "quelque", "quelques", "quels", "qui", "quiconque", "quinze", "quoi", "quoique", "r", "rare", "rarement", "rares", "relative", "relativement", "remarquable", "rend", "rendre", "restant", "reste", "restent",
+                "restrictif", "retour", "revoici", "revoilà", "rien", "s", "sa", "sacrebleu", "sait", "sans", "sapristi", "sauf", "se", "sein", "seize", "selon", "semblable", "semblaient", "semble", "semblent", "sent", "septseptième", "sera", "serai", "seraient", "serais", "serait", "seras", "serez", "seriez", "serions", "serons", "seront", "ses", "seul", "seule", "seulement", "si", "sien", "sienne", "siennes", "siens", "sinon", "six", "sixième", "soi", "soi-même",
+                "soient", "sois", "soit", "soixante", "sommes", "son", "sont", "sous", "souvent", "soyez", "soyons", "specifique", "specifiques", "speculatif", "stop", "strictement", "subtiles", "suffisant", "suffisante", "suffit", "suis", "suit", "suivant", "suivante", "suivantes", "suivants", "suivre", "sujet", "superpose", "sur", "surtout", "t", "ta", "tac", "tandis", "tant", "tardive", "te", "tel", "telle", "tellement", "telles", "tels", "tenant", "tend", "tenir",
+                "tente", "tes", "tic", "tien", "tienne", "tiennes", "tiens", "toc", "toi", "toi-même", "ton", "touchant", "toujours", "tous", "tout", "toute", "toutefois", "toutes", "treize", "trente", "tres", "trois", "troisième", "troisièmement", "trop", "très", "tsoin", "tsouin", "tu", "té", "u", "un", "une", "unes", "uniformement", "unique", "uniques", "uns", "v", "va", "vais", "valeur", "vas", "vers", "via", "vif", "vifs", "vingt", "vivat", "vive", "vives", "vlan",
+                "voici", "voie", "voient", "voilà", "voirevont", "vos", "votre", "vous", "vous-mêmes", "vu", "vé", "vôtre", "vôtres", "w", "x", "y", "z", "zut", "à", "â", "ça", "ès", "étaient", "étais", "était", "étant", "état", "étiez", "étions", "été", "étée", "étées", "étés", "êtes", "être", "ô"));
     }};
 
     public static boolean supportedLanguages(String lang) {