diff --git a/config/dataset/beir/climate-fever.yaml b/config/dataset/beir/climate-fever.yaml
new file mode 100644
index 0000000..74ea66d
--- /dev/null
+++ b/config/dataset/beir/climate-fever.yaml
@@ -0,0 +1,19 @@
+train:
+    doc: null  
+    query: null
+  
+dev:
+    doc: 
+        init_args:
+            _target_: modules.dataset_processor.IRDSDocProcessor
+            irds_name: "beir/climate-fever"
+            split: "full"
+    
+    query: 
+        init_args:
+            _target_: modules.dataset_processor.IRDSQueryProcessor
+            irds_name: "beir/climate-fever"
+            split: "full"
+test:
+    doc: null
+    query: null
\ No newline at end of file
diff --git a/config/dataset/beir/dbpedia.yaml b/config/dataset/beir/dbpedia.yaml
new file mode 100644
index 0000000..c06778b
--- /dev/null
+++ b/config/dataset/beir/dbpedia.yaml
@@ -0,0 +1,19 @@
+train:
+    doc: null  
+    query: null
+  
+dev:
+    doc: 
+        init_args:
+            _target_: modules.dataset_processor.IRDSDocProcessor
+            irds_name: "beir/dbpedia-entity/test"
+            split: "full"
+    
+    query: 
+        init_args:
+            _target_: modules.dataset_processor.IRDSQueryProcessor
+            irds_name: "beir/dbpedia-entity/test"
+            split: "full"
+test:
+    doc: null
+    query: null
\ No newline at end of file
diff --git a/config/dataset/beir/fever.yaml b/config/dataset/beir/fever.yaml
new file mode 100644
index 0000000..cf3254c
--- /dev/null
+++ b/config/dataset/beir/fever.yaml
@@ -0,0 +1,19 @@
+train:
+    doc: null  
+    query: null
+  
+dev:
+    doc: 
+        init_args:
+            _target_: modules.dataset_processor.IRDSDocProcessor
+            irds_name: "beir/fever/test"
+            split: "full"
+    
+    query: 
+        init_args:
+            _target_: modules.dataset_processor.IRDSQueryProcessor
+            irds_name: "beir/fever/test"
+            split: "full"
+test:
+    doc: null
+    query: null
\ No newline at end of file
diff --git a/config/dataset/beir/fiqa.yaml b/config/dataset/beir/fiqa.yaml
new file mode 100644
index 0000000..b0473bf
--- /dev/null
+++ b/config/dataset/beir/fiqa.yaml
@@ -0,0 +1,19 @@
+train:
+    doc: null  
+    query: null
+  
+dev:
+    doc: 
+        init_args:
+            _target_: modules.dataset_processor.IRDSDocProcessor
+            irds_name: "beir/fiqa/test"
+            split: "full"
+    
+    query: 
+        init_args:
+            _target_: modules.dataset_processor.IRDSQueryProcessor
+            irds_name: "beir/fiqa/test"
+            split: "full"
+test:
+    doc: null
+    query: null
\ No newline at end of file
diff --git a/config/dataset/beir/hotpotqa.yaml b/config/dataset/beir/hotpotqa.yaml
new file mode 100644
index 0000000..cd0880d
--- /dev/null
+++ b/config/dataset/beir/hotpotqa.yaml
@@ -0,0 +1,19 @@
+train:
+    doc: null  
+    query: null
+  
+dev:
+    doc: 
+        init_args:
+            _target_: modules.dataset_processor.IRDSDocProcessor
+            irds_name: "beir/hotpotqa/test"
+            split: "full"
+    
+    query: 
+        init_args:
+            _target_: modules.dataset_processor.IRDSQueryProcessor
+            irds_name: "beir/hotpotqa/test"
+            split: "full"
+test:
+    doc: null
+    query: null
\ No newline at end of file
diff --git a/config/dataset/beir/lotte_forum.yaml b/config/dataset/beir/lotte_forum.yaml
new file mode 100644
index 0000000..17c5cd3
--- /dev/null
+++ b/config/dataset/beir/lotte_forum.yaml
@@ -0,0 +1,19 @@
+train:
+    doc: null  
+    query: null
+  
+dev:
+    doc: 
+        init_args:
+            _target_: modules.dataset_processor.IRDSDocProcessor
+            irds_name: "lotte/pooled/test/forum"
+            split: "full"
+    
+    query: 
+        init_args:
+            _target_: modules.dataset_processor.IRDSQueryProcessor
+            irds_name: "lotte/pooled/test/forum"
+            split: "full"
+test:
+    doc: null
+    query: null
diff --git a/config/dataset/beir/lotte_search.yaml b/config/dataset/beir/lotte_search.yaml
new file mode 100644
index 0000000..389400c
--- /dev/null
+++ b/config/dataset/beir/lotte_search.yaml
@@ -0,0 +1,19 @@
+train:
+    doc: null  
+    query: null
+  
+dev:
+    doc: 
+        init_args:
+            _target_: modules.dataset_processor.IRDSDocProcessor
+            irds_name: "lotte/pooled/test/search"
+            split: "full"
+    
+    query: 
+        init_args:
+            _target_: modules.dataset_processor.IRDSQueryProcessor
+            irds_name: "lotte/pooled/test/search"
+            split: "full"
+test:
+    doc: null
+    query: null
diff --git a/config/dataset/beir/nfcorpus.yaml b/config/dataset/beir/nfcorpus.yaml
new file mode 100644
index 0000000..61e3fd0
--- /dev/null
+++ b/config/dataset/beir/nfcorpus.yaml
@@ -0,0 +1,19 @@
+train:
+    doc: null  
+    query: null
+  
+dev:
+    doc: 
+        init_args:
+            _target_: modules.dataset_processor.IRDSDocProcessor
+            irds_name: "beir/nfcorpus/test"
+            split: "full"
+    
+    query: 
+        init_args:
+            _target_: modules.dataset_processor.IRDSQueryProcessor
+            irds_name: "beir/nfcorpus/test"
+            split: "full"
+test:
+    doc: null
+    query: null
\ No newline at end of file
diff --git a/config/dataset/beir/nq.yaml b/config/dataset/beir/nq.yaml
new file mode 100644
index 0000000..15fad26
--- /dev/null
+++ b/config/dataset/beir/nq.yaml
@@ -0,0 +1,19 @@
+train:
+    doc: null  
+    query: null
+  
+dev:
+    doc: 
+        init_args:
+            _target_: modules.dataset_processor.IRDSDocProcessor
+            irds_name: "beir/nq"
+            split: "full"
+    
+    query: 
+        init_args:
+            _target_: modules.dataset_processor.IRDSQueryProcessor
+            irds_name: "beir/nq"
+            split: "full"
+test:
+    doc: null
+    query: null
\ No newline at end of file
diff --git a/config/dataset/beir/quora.yaml b/config/dataset/beir/quora.yaml
new file mode 100644
index 0000000..2c2d273
--- /dev/null
+++ b/config/dataset/beir/quora.yaml
@@ -0,0 +1,19 @@
+train:
+    doc: null  
+    query: null
+  
+dev:
+    doc: 
+        init_args:
+            _target_: modules.dataset_processor.IRDSDocProcessor
+            irds_name: "beir/quora/test"
+            split: "full"
+    query: 
+        init_args:
+            _target_: modules.dataset_processor.IRDSQueryProcessor
+            irds_name: "beir/quora/test"
+            split: "full"
+  
+test:
+    doc: null
+    query: null
diff --git a/config/dataset/beir/scidocs.yaml b/config/dataset/beir/scidocs.yaml
new file mode 100644
index 0000000..2b07173
--- /dev/null
+++ b/config/dataset/beir/scidocs.yaml
@@ -0,0 +1,19 @@
+train:
+    doc: null  
+    query: null
+  
+dev:
+    doc: 
+        init_args:
+            _target_: modules.dataset_processor.IRDSDocProcessor
+            irds_name: "beir/scidocs"
+            split: "full"
+    
+    query: 
+        init_args:
+            _target_: modules.dataset_processor.IRDSQueryProcessor
+            irds_name: "beir/scidocs"
+            split: "full"
+test:
+    doc: null
+    query: null
\ No newline at end of file
diff --git a/config/dataset/beir/scifact.yaml b/config/dataset/beir/scifact.yaml
new file mode 100644
index 0000000..9aa23e6
--- /dev/null
+++ b/config/dataset/beir/scifact.yaml
@@ -0,0 +1,19 @@
+train:
+    doc: null  
+    query: null
+  
+dev:
+    doc: 
+        init_args:
+            _target_: modules.dataset_processor.IRDSDocProcessor
+            irds_name: "beir/scifact/test"
+            split: "full"
+    
+    query: 
+        init_args:
+            _target_: modules.dataset_processor.IRDSQueryProcessor
+            irds_name: "beir/scifact/test"
+            split: "full"
+test:
+    doc: null
+    query: null
\ No newline at end of file
diff --git a/config/dataset/beir/trec-covid.yaml b/config/dataset/beir/trec-covid.yaml
new file mode 100644
index 0000000..60e7030
--- /dev/null
+++ b/config/dataset/beir/trec-covid.yaml
@@ -0,0 +1,19 @@
+train:
+    doc: null  
+    query: null
+  
+dev:
+    doc: 
+        init_args:
+            _target_: modules.dataset_processor.IRDSDocProcessor
+            irds_name: "beir/trec-covid"
+            split: "full"
+    
+    query: 
+        init_args:
+            _target_: modules.dataset_processor.IRDSQueryProcessor
+            irds_name: "beir/trec-covid"
+            split: "full"
+test:
+    doc: null
+    query: null
\ No newline at end of file
diff --git a/config/dataset/beir/webis-touche.yaml b/config/dataset/beir/webis-touche.yaml
new file mode 100644
index 0000000..7d8783c
--- /dev/null
+++ b/config/dataset/beir/webis-touche.yaml
@@ -0,0 +1,19 @@
+train:
+    doc: null  
+    query: null
+  
+dev:
+    doc: 
+        init_args:
+            _target_: modules.dataset_processor.IRDSDocProcessor
+            irds_name: "beir/webis-touche2020/v2"
+            split: "full"
+    
+    query: 
+        init_args:
+            _target_: modules.dataset_processor.IRDSQueryProcessor
+            irds_name: "beir/webis-touche2020/v2"
+            split: "full"
+test:
+    doc: null
+    query: null
\ No newline at end of file
diff --git a/config/dataset/bright/aops.yaml b/config/dataset/bright/aops.yaml
new file mode 100644
index 0000000..aa18b25
--- /dev/null
+++ b/config/dataset/bright/aops.yaml
@@ -0,0 +1,19 @@
+train:
+    doc: null  
+    query: null
+  
+dev:
+    doc: 
+        init_args:
+            _target_: modules.processors.bright_dataset_processor.BRIGHTDocProcessor
+            split: "aops"
+            longdoc: False
+    
+    query: 
+        init_args:
+            _target_: modules.processors.bright_dataset_processor.BRIGHTQueryProcessor
+            split: "aops"
+            longdoc: False
+test:
+    doc: null
+    query: null
diff --git a/config/dataset/bright/biology.yaml b/config/dataset/bright/biology.yaml
new file mode 100644
index 0000000..49a1909
--- /dev/null
+++ b/config/dataset/bright/biology.yaml
@@ -0,0 +1,19 @@
+train:
+    doc: null  
+    query: null
+  
+dev:
+    doc: 
+        init_args:
+            _target_: modules.processors.bright_dataset_processor.BRIGHTDocProcessor
+            split: "biology"
+            longdoc: False
+    
+    query: 
+        init_args:
+            _target_: modules.processors.bright_dataset_processor.BRIGHTQueryProcessor
+            split: "biology"
+            longdoc: False
+test:
+    doc: null
+    query: null
diff --git a/config/dataset/bright/earth_science.yaml b/config/dataset/bright/earth_science.yaml
new file mode 100644
index 0000000..3990005
--- /dev/null
+++ b/config/dataset/bright/earth_science.yaml
@@ -0,0 +1,19 @@
+train:
+    doc: null  
+    query: null
+  
+dev:
+    doc: 
+        init_args:
+            _target_: modules.processors.bright_dataset_processor.BRIGHTDocProcessor
+            split: "earth_science"
+            longdoc: False
+    
+    query: 
+        init_args:
+            _target_: modules.processors.bright_dataset_processor.BRIGHTQueryProcessor
+            split: "earth_science"
+            longdoc: False
+test:
+    doc: null
+    query: null
diff --git a/config/dataset/bright/economics.yaml b/config/dataset/bright/economics.yaml
new file mode 100644
index 0000000..536d34a
--- /dev/null
+++ b/config/dataset/bright/economics.yaml
@@ -0,0 +1,19 @@
+train:
+    doc: null  
+    query: null
+  
+dev:
+    doc: 
+        init_args:
+            _target_: modules.processors.bright_dataset_processor.BRIGHTDocProcessor
+            split: "economics"
+            longdoc: False
+    
+    query: 
+        init_args:
+            _target_: modules.processors.bright_dataset_processor.BRIGHTQueryProcessor
+            split: "economics"
+            longdoc: False
+test:
+    doc: null
+    query: null
diff --git a/config/dataset/bright/leetcode.yaml b/config/dataset/bright/leetcode.yaml
new file mode 100644
index 0000000..308fde3
--- /dev/null
+++ b/config/dataset/bright/leetcode.yaml
@@ -0,0 +1,19 @@
+train:
+    doc: null  
+    query: null
+  
+dev:
+    doc: 
+        init_args:
+            _target_: modules.processors.bright_dataset_processor.BRIGHTDocProcessor
+            split: "leetcode"
+            longdoc: False
+    
+    query: 
+        init_args:
+            _target_: modules.processors.bright_dataset_processor.BRIGHTQueryProcessor
+            split: "leetcode"
+            longdoc: False
+test:
+    doc: null
+    query: null
diff --git a/config/dataset/bright/pony.yaml b/config/dataset/bright/pony.yaml
new file mode 100644
index 0000000..c57261e
--- /dev/null
+++ b/config/dataset/bright/pony.yaml
@@ -0,0 +1,19 @@
+train:
+    doc: null  
+    query: null
+  
+dev:
+    doc: 
+        init_args:
+            _target_: modules.processors.bright_dataset_processor.BRIGHTDocProcessor
+            split: "pony"
+            longdoc: False
+    
+    query: 
+        init_args:
+            _target_: modules.processors.bright_dataset_processor.BRIGHTQueryProcessor
+            split: "pony"
+            longdoc: False
+test:
+    doc: null
+    query: null
diff --git a/config/dataset/bright/psychology.yaml b/config/dataset/bright/psychology.yaml
new file mode 100644
index 0000000..93b281d
--- /dev/null
+++ b/config/dataset/bright/psychology.yaml
@@ -0,0 +1,19 @@
+train:
+    doc: null  
+    query: null
+  
+dev:
+    doc: 
+        init_args:
+            _target_: modules.processors.bright_dataset_processor.BRIGHTDocProcessor
+            split: "psychology"
+            longdoc: False
+    
+    query: 
+        init_args:
+            _target_: modules.processors.bright_dataset_processor.BRIGHTQueryProcessor
+            split: "psychology"
+            longdoc: False
+test:
+    doc: null
+    query: null
diff --git a/config/dataset/bright/robotics.yaml b/config/dataset/bright/robotics.yaml
new file mode 100644
index 0000000..32a192d
--- /dev/null
+++ b/config/dataset/bright/robotics.yaml
@@ -0,0 +1,19 @@
+train:
+    doc: null  
+    query: null
+  
+dev:
+    doc: 
+        init_args:
+            _target_: modules.processors.bright_dataset_processor.BRIGHTDocProcessor
+            split: "robotics"
+            longdoc: False
+    
+    query: 
+        init_args:
+            _target_: modules.processors.bright_dataset_processor.BRIGHTQueryProcessor
+            split: "robotics"
+            longdoc: False
+test:
+    doc: null
+    query: null
diff --git a/config/dataset/bright/stackoverflow.yaml b/config/dataset/bright/stackoverflow.yaml
new file mode 100644
index 0000000..3a0f5da
--- /dev/null
+++ b/config/dataset/bright/stackoverflow.yaml
@@ -0,0 +1,19 @@
+train:
+    doc: null  
+    query: null
+  
+dev:
+    doc: 
+        init_args:
+            _target_: modules.processors.bright_dataset_processor.BRIGHTDocProcessor
+            split: "stackoverflow"
+            longdoc: False
+    
+    query: 
+        init_args:
+            _target_: modules.processors.bright_dataset_processor.BRIGHTQueryProcessor
+            split: "stackoverflow"
+            longdoc: False
+test:
+    doc: null
+    query: null
diff --git a/config/dataset/bright/sustainable_living.yaml b/config/dataset/bright/sustainable_living.yaml
new file mode 100644
index 0000000..81524a8
--- /dev/null
+++ b/config/dataset/bright/sustainable_living.yaml
@@ -0,0 +1,19 @@
+train:
+    doc: null  
+    query: null
+  
+dev:
+    doc: 
+        init_args:
+            _target_: modules.processors.bright_dataset_processor.BRIGHTDocProcessor
+            split: "sustainable_living"
+            longdoc: False
+    
+    query: 
+        init_args:
+            _target_: modules.processors.bright_dataset_processor.BRIGHTQueryProcessor
+            split: "sustainable_living"
+            longdoc: False
+test:
+    doc: null
+    query: null
diff --git a/config/dataset/bright/theoremqa_questions.yaml b/config/dataset/bright/theoremqa_questions.yaml
new file mode 100644
index 0000000..2c9c5f5
--- /dev/null
+++ b/config/dataset/bright/theoremqa_questions.yaml
@@ -0,0 +1,19 @@
+train:
+    doc: null  
+    query: null
+  
+dev:
+    doc: 
+        init_args:
+            _target_: modules.processors.bright_dataset_processor.BRIGHTDocProcessor
+            split: "theoremqa_questions"
+            longdoc: False
+    
+    query: 
+        init_args:
+            _target_: modules.processors.bright_dataset_processor.BRIGHTQueryProcessor
+            split: "theoremqa_questions"
+            longdoc: False
+test:
+    doc: null
+    query: null
diff --git a/config/dataset/bright/theoremqa_theorems.yaml b/config/dataset/bright/theoremqa_theorems.yaml
new file mode 100644
index 0000000..a13b479
--- /dev/null
+++ b/config/dataset/bright/theoremqa_theorems.yaml
@@ -0,0 +1,19 @@
+train:
+    doc: null  
+    query: null
+  
+dev:
+    doc: 
+        init_args:
+            _target_: modules.processors.bright_dataset_processor.BRIGHTDocProcessor
+            split: "theoremqa_theorems"
+            longdoc: False
+    
+    query: 
+        init_args:
+            _target_: modules.processors.bright_dataset_processor.BRIGHTQueryProcessor
+            split: "theoremqa_theorems"
+            longdoc: False
+test:
+    doc: null
+    query: null
diff --git a/config/dataset/msmarco-train.yaml b/config/dataset/msmarco-train.yaml
new file mode 100644
index 0000000..4c6c3a5
--- /dev/null
+++ b/config/dataset/msmarco-train.yaml
@@ -0,0 +1,16 @@
+train:
+    doc: null
+    query: null
+dev:
+    doc: 
+        init_args:
+            _target_: modules.dataset_processor.MsMarcoCollection
+            split: "full"
+
+    query: 
+        init_args:
+            _target_: modules.dataset_processor.MsMarcoTrainQueries
+            split: "full"
+test:
+    doc: null
+    query: null
diff --git a/config/reranker/bge.yaml b/config/reranker/bge.yaml
index 11216c1..c405a9c 100644
--- a/config/reranker/bge.yaml
+++ b/config/reranker/bge.yaml
@@ -1,5 +1,5 @@
 init_args: 
   _target_: models.rerankers.crossencoder.CrossEncoder
-  model_name: "BAAI/bge-large-en"
+  model_name: "BAAI/bge-reranker-large"
   max_len: 256
 batch_size: 256
diff --git a/config/reranker/mixbread.yaml b/config/reranker/mixbread.yaml
new file mode 100644
index 0000000..eb51a46
--- /dev/null
+++ b/config/reranker/mixbread.yaml
@@ -0,0 +1,5 @@
+init_args: 
+  _target_: models.rerankers.crossencoder.CrossEncoder
+  model_name: "mixedbread-ai/mxbai-rerank-large-v1"
+  max_len: 256
+batch_size: 64
diff --git a/config/reranker/rankllama.yaml b/config/reranker/rankllama.yaml
new file mode 100644
index 0000000..df79e30
--- /dev/null
+++ b/config/reranker/rankllama.yaml
@@ -0,0 +1,5 @@
+init_args: 
+  _target_: models.rerankers.crossencoder.CrossEncoder
+  model_name: "castorini/rankllama-v1-7b-lora-passage"
+  max_len: 256
+batch_size: 32
diff --git a/models/rerankers/crossencoder.py b/models/rerankers/crossencoder.py
index bb5476c..1c1be33 100644
--- a/models/rerankers/crossencoder.py
+++ b/models/rerankers/crossencoder.py
@@ -14,8 +14,14 @@ class CrossEncoder(Reranker):
     def __init__(self, model_name=None,max_len=512):
         self.model_name = model_name
         self.max_len= max_len
-        self.model = AutoModelForSequenceClassification.from_pretrained(self.model_name, low_cpu_mem_usage=True, torch_dtype=torch.float16)
-        self.tokenizer = AutoTokenizer.from_pretrained(self.model_name, max_length=self.max_len)
+        self.model = AutoModelForSequenceClassification.from_pretrained(self.model_name, num_labels=1, torch_dtype=torch.float16)
+        if model_name== 'castorini/rankllama-v1-7b-lora-passage':
+             self.tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-hf", padding_side="right")
+             self.tokenizer.pad_token = self.tokenizer.eos_token
+             self.tokenizer.pad_token_id = self.tokenizer.eos_token_id    
+             self.model.config.pad_token_id = self.tokenizer.pad_token_id             
+        else:
+            self.tokenizer = AutoTokenizer.from_pretrained(self.model_name, max_length=self.max_len)
         self.model.eval()
         if torch.cuda.device_count() > 1 and torch.cuda.is_available():
             self.model = torch.nn.DataParallel(self.model)
diff --git a/models/retrievers/repllama.py b/models/retrievers/repllama.py
index 0bb308c..78fac23 100644
--- a/models/retrievers/repllama.py
+++ b/models/retrievers/repllama.py
@@ -55,7 +55,7 @@ def collate_fn(self, batch, query_or_doc):
         return_dict = self.tokenizer(content, padding=True, truncation=True, max_length=self.max_len,return_tensors='pt')
         return return_dict
 
-    def __call__(self, kwargs):
+    def __call__(self, query_or_doc, kwargs):
         kwargs = {key: value.to(self.device) for key, value in kwargs.items()}
         # get accumulated eos token counts per exmaple
         accumulated_eos_tokens = (kwargs['input_ids'] != self.tokenizer.pad_token_id).cumsum(dim=1)
diff --git a/modules/dataset_processor.py b/modules/dataset_processor.py
index 69923c7..c18931a 100644
--- a/modules/dataset_processor.py
+++ b/modules/dataset_processor.py
@@ -483,15 +483,74 @@ def __init__(self, *args, **kwargs):
     def process(self):
         # load from the ir-dataset HF repo
         hf_name = "irds/msmarco-passage"
-        dataset = datasets.load_dataset(hf_name, 'docs', num_proc=self.num_proc)  # no need for split?
+        dataset = datasets.load_dataset(hf_name, 'docs', num_proc=self.num_proc,trust_remote_code=True)  # no need for split?
         dataset = dataset.rename_column("doc_id", "id")
         dataset = dataset.rename_column("text", "content")
         return dataset
 
+class MsMarcoTrainQueries(Processor):
+
+    def __init__(self, *args, **kwargs):
+        dataset_name = 'ms-marco-train-queries'
+        super().__init__(*args, **kwargs, dataset_name=dataset_name)
+
+    def process(self):
+        import ir_datasets
+        ird = ir_datasets.load("msmarco-passage/train/judged")
+        Qid= [q.query_id for q in ird.queries_iter()]
+        Qtext= [q.text for q in ird.queries_iter()]
+        hf_dataset= datasets.Dataset.from_dict({'id':Qid, 'content':Qtext})
+        return hf_dataset
+
 # applies processing to dataset names
 # processes query and doc with different processors
 
 
+class IRDSDocProcessor(Processor):
+    def __init__(self, irds_name,*args, **kwargs):
+        dataset_name = irds_name.replace('/','_')+'_doc'
+        super().__init__(*args, **kwargs, dataset_name=dataset_name)
+        self.irds_name=irds_name
+
+    def process(self):
+        import ir_datasets
+        dataset = ir_datasets.load(self.irds_name)
+        print(dataset)
+        def dataset_generator():
+            for doc in dataset.docs_iter():
+                    doc # namedtuple<doc_id, text, title>
+                    doc_text=''
+                    if hasattr(doc,'title'):
+                        doc_text+=doc.title+ ' ' +doc.text
+                    else:
+                        doc_text+=doc.text
+                    yield {'id':doc.doc_id, 'content':doc_text}
+        
+        hf_dataset= datasets.Dataset.from_generator(dataset_generator)
+        return hf_dataset
+
+
+class IRDSQueryProcessor(Processor):
+    def __init__(self, irds_name,*args, **kwargs):
+        dataset_name = irds_name.replace('/','_')+'_query'
+        self.irds_name=irds_name
+        super().__init__(*args, **kwargs, dataset_name=dataset_name)
+
+    def process(self):
+        import ir_datasets
+        dataset = ir_datasets.load(self.irds_name)
+        print(dataset)
+        def dataset_generator():
+            for doc in dataset.queries_iter():
+                    ## namedtuple<query_id, text>
+                    yield {'id':doc.query_id, 'content':doc.text}
+
+        hf_dataset= datasets.Dataset.from_generator(dataset_generator)
+        return hf_dataset
+
+
+    
+
 class UT1Queries(Processor):
     def __init__(self, *args, **kwargs):
         dataset_name = 'ut1queries'
diff --git a/modules/processors/bright_dataset_processor.py b/modules/processors/bright_dataset_processor.py
new file mode 100644
index 0000000..b20ff5b
--- /dev/null
+++ b/modules/processors/bright_dataset_processor.py
@@ -0,0 +1,47 @@
+from ..dataset_processor import *
+import datasets
+import requests  
+
+
+
+
+   
+class BRIGHTDocProcessor(Processor):
+    def __init__(self, longdoc, split,*args, **kwargs):
+        dataset_name = 'BRIGHT_%s'% split 
+        super().__init__(*args, **kwargs, split=split,dataset_name=dataset_name)
+        self.longdoc = longdoc
+
+    def process(self):
+        hf_name = 'xlangai/BRIGHT' 
+        doc = 'long_documents' if self.longdoc else 'documents'
+        dataset = datasets.load_dataset(hf_name, doc,num_proc=self.num_proc)[self.split]
+        return dataset
+
+
+   
+   
+class BRIGHTQueryProcessor(Processor):
+    def __init__(self, longdoc,split,qlen=-1,*args, **kwargs):
+        dataset_name = 'BRIGHTQuery_%s' %split
+        super().__init__(*args, **kwargs, split=split,dataset_name=dataset_name)
+        self.longdoc = longdoc
+        self.qlen = qlen
+
+    def process(self):
+        hf_name = 'xlangai/BRIGHT' 
+        dataset = datasets.load_dataset(hf_name, "examples",num_proc=self.num_proc)[self.split]
+        dataset = dataset.rename_column("query", "content")
+        if self.qlen != -1:
+           dataset = dataset.map(lambda x:{'content':" ".join(x['content'].split()[:self.qlen])})
+        if self.longdoc:
+            dataset = dataset.rename_column("gold_ids_long", "ranking_label")
+        else:
+            dataset = dataset.rename_column("gold_ids", "ranking_label")
+
+        dataset = dataset.remove_columns(['reasoning', 'excluded_ids','gold_ids_long'])
+        
+        return dataset
+
+
+