Skip to content

Lower scores than expected for asymmetric dense evaluations #1

@meetdoshi90

Description

@meetdoshi90

Hi,
Thanks for sharing your implementation; it’s well organized.
I tried training a dense decoder HybridModel with both symmetric and asymmetric dense losses. During evaluation, the symmetric dense model performed as expected, but the asymmetric one showed much lower performance. Here’s an example MTEB score on SciFact for symmetric vs. asymmetric dense evaluation on the same checkpoint.

Asymmetric dense scores

Scores: {'default': {'ndcg_at_1': 0.01, 'ndcg_at_3': 0.02203, 'ndcg_at_5': 0.02879, 'ndcg_at_10': 0.04163, 'ndcg_at_20': 0.05296, 'ndcg_at_50': 0.08133, 'ndcg_at_100': 0.10282, 'ndcg_at_200': 0.12725, 'ndcg_at_500': 0.1493, 'ndcg_at_1000': 0.16184, 'map_at_1': 0.00833, 'map_at_3': 0.0175, 'map_at_5': 0.02133, 'map_at_10': 0.02614, 'map_at_20': 0.02941, 'map_at_50': 0.03403, 'map_at_100': 0.03595, 'map_at_200': 0.03723, 'map_at_500': 0.03786, 'map_at_1000': 0.03806, 'recall_at_1': 0.00833, 'recall_at_3': 0.0325, 'recall_at_5': 0.04917, 'recall_at_10': 0.09083, 'recall_at_20': 0.13417, 'recall_at_50': 0.27389, 'recall_at_100': 0.40178, 'recall_at_200': 0.57422, 'recall_at_500': 0.74983, 'recall_at_1000': 0.86422, 'precision_at_1': 0.01, 'precision_at_3': 0.01222, 'precision_at_5': 0.01067, 'precision_at_10': 0.00967, 'precision_at_20': 0.00717, 'precision_at_50': 0.006, 'precision_at_100': 0.0045, 'precision_at_200': 0.00317, 'precision_at_500': 0.00169, 'precision_at_1000': 0.00097, 'mrr_at_1': 0.01, 'mrr_at_3': 0.02, 'mrr_at_5': 0.023833333333333335, 'mrr_at_10': 0.02891666666666667, 'mrr_at_20': 0.03207321674426938, 'mrr_at_50': 0.036673329313709854, 'mrr_at_100': 0.03861110272785376, 'mrr_at_200': 0.03984023769125718, 'mrr_at_500': 0.040407059159300714, 'mrr_at_1000': 0.04058393347043627, 'nauc_ndcg_at_1_max': 0.8873088274751811, 'nauc_ndcg_at_1_std': 1.0, 'nauc_ndcg_at_1_diff1': 0.19023343171451565, 'nauc_ndcg_at_3_max': 0.24736998169987048, 'nauc_ndcg_at_3_std': 0.31552141780358806, 'nauc_ndcg_at_3_diff1': 0.20928856914909502, 'nauc_ndcg_at_5_max': 0.15287187045237985, 'nauc_ndcg_at_5_std': 0.2613451457732155, 'nauc_ndcg_at_5_diff1': 0.11082325598025994, 'nauc_ndcg_at_10_max': 0.03103520361508239, 'nauc_ndcg_at_10_std': 0.19623666317417462, 'nauc_ndcg_at_10_diff1': 0.09505187914285969, 'nauc_ndcg_at_20_max': -0.021678121045771933, 'nauc_ndcg_at_20_std': 0.18014107040362765, 'nauc_ndcg_at_20_diff1': 0.12248876871860763, 'nauc_ndcg_at_50_max': 0.03388460121351858, 'nauc_ndcg_at_50_std': 0.17810830080044893, 'nauc_ndcg_at_50_diff1': 0.06811624328858898, 'nauc_ndcg_at_100_max': 0.059711006055000765, 'nauc_ndcg_at_100_std': 0.1790609964306146, 'nauc_ndcg_at_100_diff1': 0.0758548186745338, 'nauc_ndcg_at_200_max': 0.039965124635792375, 'nauc_ndcg_at_200_std': 0.17287168656420276, 'nauc_ndcg_at_200_diff1': 0.07610878998223512, 'nauc_ndcg_at_500_max': 0.0682054349493362, 'nauc_ndcg_at_500_std': 0.16195893442778894, 'nauc_ndcg_at_500_diff1': 0.08726149471966554, 'nauc_ndcg_at_1000_max': 0.09210514072192345, 'nauc_ndcg_at_1000_std': 0.16352913857867266, 'nauc_ndcg_at_1000_diff1': 0.10645922734326128, 'nauc_map_at_1_max': 0.9323852964851088, 'nauc_map_at_1_std': 1.0, 'nauc_map_at_1_diff1': 0.3112959484840356, 'nauc_map_at_3_max': 0.33082908505500397, 'nauc_map_at_3_std': 0.4022214556156062, 'nauc_map_at_3_diff1': 0.22467962231847383, 'nauc_map_at_5_max': 0.24931873826133621, 'nauc_map_at_5_std': 0.34385550822824423, 'nauc_map_at_5_diff1': 0.14957363496109463, 'nauc_map_at_10_max': 0.15790678756023116, 'nauc_map_at_10_std': 0.28887237638695756, 'nauc_map_at_10_diff1': 0.1324800374401656, 'nauc_map_at_20_max': 0.11952298962749441, 'nauc_map_at_20_std': 0.27802101330040757, 'nauc_map_at_20_diff1': 0.13948881925115691, 'nauc_map_at_50_max': 0.12620019252399264, 'nauc_map_at_50_std': 0.2694732223309835, 'nauc_map_at_50_diff1': 0.11800783460335836, 'nauc_map_at_100_max': 0.13067406358728514, 'nauc_map_at_100_std': 0.26790694489111305, 'nauc_map_at_100_diff1': 0.11875033571443026, 'nauc_map_at_200_max': 0.12842702900198533, 'nauc_map_at_200_std': 0.26713032343269, 'nauc_map_at_200_diff1': 0.11896950670690037, 'nauc_map_at_500_max': 0.13113825880068627, 'nauc_map_at_500_std': 0.2669380174955775, 'nauc_map_at_500_diff1': 0.12026413994361251, 'nauc_map_at_1000_max': 0.13178277364160865, 'nauc_map_at_1000_std': 0.26720097163315026, 'nauc_map_at_1000_diff1': 0.12087882182780195, 'nauc_recall_at_1_max': 0.9323852964851088, 'nauc_recall_at_1_std': 1.0, 'nauc_recall_at_1_diff1': 0.3112959484840356, 'nauc_recall_at_3_max': 0.07684051928752753, 'nauc_recall_at_3_std': 0.1876810248154492, 'nauc_recall_at_3_diff1': 0.1776433922932447, 'nauc_recall_at_5_max': -0.0020691881960770934, 'nauc_recall_at_5_std': 0.14674409826597484, 'nauc_recall_at_5_diff1': 0.05590446262500406, 'nauc_recall_at_10_max': -0.10846463815991915, 'nauc_recall_at_10_std': 0.09684351484210182, 'nauc_recall_at_10_diff1': 0.056426818431332945, 'nauc_recall_at_20_max': -0.15129279140461027, 'nauc_recall_at_20_std': 0.09118742785660496, 'nauc_recall_at_20_diff1': 0.1126339610530519, 'nauc_recall_at_50_max': -0.011660189219342806, 'nauc_recall_at_50_std': 0.1240388693881615, 'nauc_recall_at_50_diff1': 0.030971005550193692, 'nauc_recall_at_100_max': 0.03995566084134627, 'nauc_recall_at_100_std': 0.13898928609185673, 'nauc_recall_at_100_diff1': 0.047234533867142124, 'nauc_recall_at_200_max': -0.00538693115698711, 'nauc_recall_at_200_std': 0.122867159104308, 'nauc_recall_at_200_diff1': 0.043217682898696125, 'nauc_recall_at_500_max': 0.06892622982352326, 'nauc_recall_at_500_std': 0.07012588473387449, 'nauc_recall_at_500_diff1': 0.05338580574167755, 'nauc_recall_at_1000_max': 0.23381040483286153, 'nauc_recall_at_1000_std': 0.02662457801883639, 'nauc_recall_at_1000_diff1': 0.15134187240144242, 'nauc_precision_at_1_max': 0.8873088274751811, 'nauc_precision_at_1_std': 1.0, 'nauc_precision_at_1_diff1': 0.19023343171451565, 'nauc_precision_at_3_max': 0.1433275605532111, 'nauc_precision_at_3_std': 0.17262238700392696, 'nauc_precision_at_3_diff1': 0.2067712271629631, 'nauc_precision_at_5_max': 0.049805473571236776, 'nauc_precision_at_5_std': 0.13958948215723094, 'nauc_precision_at_5_diff1': 0.08544070297826657, 'nauc_precision_at_10_max': -0.08200179491686962, 'nauc_precision_at_10_std': 0.11061869119102904, 'nauc_precision_at_10_diff1': 0.07731095546940762, 'nauc_precision_at_20_max': -0.14090436196526135, 'nauc_precision_at_20_std': 0.10949352698410512, 'nauc_precision_at_20_diff1': 0.1371356129556776, 'nauc_precision_at_50_max': 0.00986659847419339, 'nauc_precision_at_50_std': 0.12160422793334173, 'nauc_precision_at_50_diff1': 0.050411285854323924, 'nauc_precision_at_100_max': 0.07085633561962114, 'nauc_precision_at_100_std': 0.10302793732225513, 'nauc_precision_at_100_diff1': 0.06947712782765472, 'nauc_precision_at_200_max': 0.016496623747989127, 'nauc_precision_at_200_std': 0.07361705764882281, 'nauc_precision_at_200_diff1': 0.06959197469947863, 'nauc_precision_at_500_max': 0.0672620614128177, 'nauc_precision_at_500_std': -0.026215479697435207, 'nauc_precision_at_500_diff1': 0.11457387111288471, 'nauc_precision_at_1000_max': 0.11126656660985439, 'nauc_precision_at_1000_std': -0.09199969474192927, 'nauc_precision_at_1000_diff1': 0.16882806339192627, 'nauc_mrr_at_1_max': 0.8873088274751811, 'nauc_mrr_at_1_std': 1.0, 'nauc_mrr_at_1_diff1': 0.19023343171451565, 'nauc_mrr_at_3_max': 0.37221625972632133, 'nauc_mrr_at_3_std': 0.4113302328354647, 'nauc_mrr_at_3_diff1': 0.20367140685090773, 'nauc_mrr_at_5_max': 0.29259927086597315, 'nauc_mrr_at_5_std': 0.35762153953728915, 'nauc_mrr_at_5_diff1': 0.13982261366216273, 'nauc_mrr_at_10_max': 0.19516589356120015, 'nauc_mrr_at_10_std': 0.31180322457620524, 'nauc_mrr_at_10_diff1': 0.12775936191327736, 'nauc_mrr_at_20_max': 0.15997275098264113, 'nauc_mrr_at_20_std': 0.2934291724615113, 'nauc_mrr_at_20_diff1': 0.13523063960696954, 'nauc_mrr_at_50_max': 0.16051702576771093, 'nauc_mrr_at_50_std': 0.28042982882220957, 'nauc_mrr_at_50_diff1': 0.11133084376632707, 'nauc_mrr_at_100_max': 0.16310397130728652, 'nauc_mrr_at_100_std': 0.2788672865338168, 'nauc_mrr_at_100_diff1': 0.11351854436215868, 'nauc_mrr_at_200_max': 0.1616736826663659, 'nauc_mrr_at_200_std': 0.2787364759464312, 'nauc_mrr_at_200_diff1': 0.11360630876365696, 'nauc_mrr_at_500_max': 0.16413674646230858, 'nauc_mrr_at_500_std': 0.27961541025021586, 'nauc_mrr_at_500_diff1': 0.11438899545963606, 'nauc_mrr_at_1000_max': 0.16493075754133144, 'nauc_mrr_at_1000_std': 0.2800562839036051, 'nauc_mrr_at_1000_diff1': 0.11481979158188106, 'main_score': 0.04163}}

Symmetric dense scores

Scores: {'default': {'ndcg_at_1': 0.65, 'ndcg_at_3': 0.73397, 'ndcg_at_5': 0.76058, 'ndcg_at_10': 0.78409, 'ndcg_at_20': 0.79167, 'ndcg_at_50': 0.79514, 'ndcg_at_100': 0.79728, 'ndcg_at_200': 0.79773, 'ndcg_at_500': 0.7993, 'ndcg_at_1000': 0.7993, 'map_at_1': 0.62078, 'map_at_3': 0.70449, 'map_at_5': 0.72305, 'map_at_10': 0.73523, 'map_at_20': 0.73768, 'map_at_50': 0.73831, 'map_at_100': 0.73849, 'map_at_200': 0.73851, 'map_at_500': 0.73855, 'map_at_1000': 0.73855, 'recall_at_1': 0.62078, 'recall_at_3': 0.79111, 'recall_at_5': 0.85939, 'recall_at_10': 0.925, 'recall_at_20': 0.95333, 'recall_at_50': 0.97, 'recall_at_100': 0.98333, 'recall_at_200': 0.98667, 'recall_at_500': 1.0, 'recall_at_1000': 1.0, 'precision_at_1': 0.65, 'precision_at_3': 0.28889, 'precision_at_5': 0.19133, 'precision_at_10': 0.105, 'precision_at_20': 0.05417, 'precision_at_50': 0.022, 'precision_at_100': 0.01113, 'precision_at_200': 0.00558, 'precision_at_500': 0.00226, 'precision_at_1000': 0.00113, 'mrr_at_1': 0.65, 'mrr_at_3': 0.7194444444444444, 'mrr_at_5': 0.7344444444444445, 'mrr_at_10': 0.7425357142857142, 'mrr_at_20': 0.7442217718928245, 'mrr_at_50': 0.7448467169960306, 'mrr_at_100': 0.7450286431063257, 'mrr_at_200': 0.7450489683095777, 'mrr_at_500': 0.7450858794747878, 'mrr_at_1000': 0.7450858794747878, 'nauc_ndcg_at_1_max': 0.3627525971736242, 'nauc_ndcg_at_1_std': -0.1306914042368519, 'nauc_ndcg_at_1_diff1': 0.7682426017847933, 'nauc_ndcg_at_3_max': 0.41604303177499014, 'nauc_ndcg_at_3_std': -0.12287485659912772, 'nauc_ndcg_at_3_diff1': 0.7434924188443847, 'nauc_ndcg_at_5_max': 0.3880591955637355, 'nauc_ndcg_at_5_std': -0.16369168349981023, 'nauc_ndcg_at_5_diff1': 0.735645120438687, 'nauc_ndcg_at_10_max': 0.41699374790651755, 'nauc_ndcg_at_10_std': -0.10536618290902276, 'nauc_ndcg_at_10_diff1': 0.7384410189986392, 'nauc_ndcg_at_20_max': 0.41455154930205196, 'nauc_ndcg_at_20_std': -0.10290403803977852, 'nauc_ndcg_at_20_diff1': 0.7360737101581665, 'nauc_ndcg_at_50_max': 0.4120531510520165, 'nauc_ndcg_at_50_std': -0.10127004841324208, 'nauc_ndcg_at_50_diff1': 0.7397544054530358, 'nauc_ndcg_at_100_max': 0.40887482235964867, 'nauc_ndcg_at_100_std': -0.0981094325123131, 'nauc_ndcg_at_100_diff1': 0.7403728968639031, 'nauc_ndcg_at_200_max': 0.4073285450169886, 'nauc_ndcg_at_200_std': -0.10063995233218721, 'nauc_ndcg_at_200_diff1': 0.7413729154388403, 'nauc_ndcg_at_500_max': 0.4081779185186141, 'nauc_ndcg_at_500_std': -0.10840691032678539, 'nauc_ndcg_at_500_diff1': 0.7416329627134625, 'nauc_ndcg_at_1000_max': 0.4081779185186141, 'nauc_ndcg_at_1000_std': -0.10840691032678539, 'nauc_ndcg_at_1000_diff1': 0.7416329627134625, 'nauc_map_at_1_max': 0.3267561655812645, 'nauc_map_at_1_std': -0.19311475359327654, 'nauc_map_at_1_diff1': 0.7812577934696515, 'nauc_map_at_3_max': 0.3869748434043196, 'nauc_map_at_3_std': -0.14532899488085232, 'nauc_map_at_3_diff1': 0.7521213867138041, 'nauc_map_at_5_max': 0.38421363557717375, 'nauc_map_at_5_std': -0.15253182015325595, 'nauc_map_at_5_diff1': 0.7454117159524001, 'nauc_map_at_10_max': 0.4024843480065793, 'nauc_map_at_10_std': -0.12267399127272965, 'nauc_map_at_10_diff1': 0.7451152251758866, 'nauc_map_at_20_max': 0.4024211594652643, 'nauc_map_at_20_std': -0.12081484049097588, 'nauc_map_at_20_diff1': 0.7448575323831138, 'nauc_map_at_50_max': 0.4018209340153318, 'nauc_map_at_50_std': -0.12093099394735983, 'nauc_map_at_50_diff1': 0.745312210319542, 'nauc_map_at_100_max': 0.40159052274572904, 'nauc_map_at_100_std': -0.12080334414760642, 'nauc_map_at_100_diff1': 0.745373548595114, 'nauc_map_at_200_max': 0.4015354972716926, 'nauc_map_at_200_std': -0.12089438525734916, 'nauc_map_at_200_diff1': 0.7454091616677377, 'nauc_map_at_500_max': 0.40155033874875046, 'nauc_map_at_500_std': -0.12103989985215688, 'nauc_map_at_500_diff1': 0.7454135879097666, 'nauc_map_at_1000_max': 0.40155033874875046, 'nauc_map_at_1000_std': -0.12103989985215688, 'nauc_map_at_1000_diff1': 0.7454135879097666, 'nauc_recall_at_1_max': 0.3267561655812645, 'nauc_recall_at_1_std': -0.19311475359327654, 'nauc_recall_at_1_diff1': 0.7812577934696515, 'nauc_recall_at_3_max': 0.4362950424256932, 'nauc_recall_at_3_std': -0.13798333089082496, 'nauc_recall_at_3_diff1': 0.7176126791714116, 'nauc_recall_at_5_max': 0.36682249887956486, 'nauc_recall_at_5_std': -0.27364636260088243, 'nauc_recall_at_5_diff1': 0.6744421113488513, 'nauc_recall_at_10_max': 0.5172009544558565, 'nauc_recall_at_10_std': 0.013352007469655426, 'nauc_recall_at_10_diff1': 0.680630770826851, 'nauc_recall_at_20_max': 0.5325463518740801, 'nauc_recall_at_20_std': 0.08266639989328771, 'nauc_recall_at_20_diff1': 0.6182472989195651, 'nauc_recall_at_50_max': 0.546944703807449, 'nauc_recall_at_50_std': 0.25920738665837, 'nauc_recall_at_50_diff1': 0.6651623612407954, 'nauc_recall_at_100_max': 0.45200746965452526, 'nauc_recall_at_100_std': 0.7722689075630278, 'nauc_recall_at_100_diff1': 0.6409897292250234, 'nauc_recall_at_200_max': 0.31500933706815215, 'nauc_recall_at_200_std': 0.748015873015867, 'nauc_recall_at_200_diff1': 0.7117180205415485, 'nauc_recall_at_500_max': nan, 'nauc_recall_at_500_std': nan, 'nauc_recall_at_500_diff1': nan, 'nauc_recall_at_1000_max': nan, 'nauc_recall_at_1000_std': nan, 'nauc_recall_at_1000_diff1': nan, 'nauc_precision_at_1_max': 0.3627525971736242, 'nauc_precision_at_1_std': -0.1306914042368519, 'nauc_precision_at_1_diff1': 0.7682426017847933, 'nauc_precision_at_3_max': 0.40964236273857924, 'nauc_precision_at_3_std': 0.1468863430745248, 'nauc_precision_at_3_diff1': 0.32016764587261903, 'nauc_precision_at_5_max': 0.3341998827837127, 'nauc_precision_at_5_std': 0.19044165008036848, 'nauc_precision_at_5_diff1': 0.10944055335371985, 'nauc_precision_at_10_max': 0.33079361889770786, 'nauc_precision_at_10_std': 0.4294685019592082, 'nauc_precision_at_10_diff1': -0.0972347812496519, 'nauc_precision_at_20_max': 0.28501605443107986, 'nauc_precision_at_20_std': 0.45756554371196756, 'nauc_precision_at_20_diff1': -0.1865051528165122, 'nauc_precision_at_50_max': 0.2705990973753426, 'nauc_precision_at_50_std': 0.5026270872435937, 'nauc_precision_at_50_diff1': -0.23336431170314173, 'nauc_precision_at_100_max': 0.24891229823853167, 'nauc_precision_at_100_std': 0.5512004767305508, 'nauc_precision_at_100_diff1': -0.28435311380660355, 'nauc_precision_at_200_max': 0.23845998921524722, 'nauc_precision_at_200_std': 0.5467739944516985, 'nauc_precision_at_200_diff1': -0.29329331280332005, 'nauc_precision_at_500_max': 0.23394769973788, 'nauc_precision_at_500_std': 0.5349115601329176, 'nauc_precision_at_500_diff1': -0.3525348634015125, 'nauc_precision_at_1000_max': 0.23394769973788, 'nauc_precision_at_1000_std': 0.5349115601329176, 'nauc_precision_at_1000_diff1': -0.3525348634015125, 'nauc_mrr_at_1_max': 0.3627525971736242, 'nauc_mrr_at_1_std': -0.1306914042368519, 'nauc_mrr_at_1_diff1': 0.7682426017847933, 'nauc_mrr_at_3_max': 0.401528717354392, 'nauc_mrr_at_3_std': -0.12084305851278879, 'nauc_mrr_at_3_diff1': 0.7499048594453275, 'nauc_mrr_at_5_max': 0.38858601551804894, 'nauc_mrr_at_5_std': -0.13567646679987813, 'nauc_mrr_at_5_diff1': 0.7463570655909431, 'nauc_mrr_at_10_max': 0.3949986377666977, 'nauc_mrr_at_10_std': -0.12173478049284686, 'nauc_mrr_at_10_diff1': 0.7482445427606959, 'nauc_mrr_at_20_max': 0.39398845465680105, 'nauc_mrr_at_20_std': -0.12152070952642012, 'nauc_mrr_at_20_diff1': 0.7471959714626155, 'nauc_mrr_at_50_max': 0.39335430988932013, 'nauc_mrr_at_50_std': -0.12164076891342937, 'nauc_mrr_at_50_diff1': 0.7476647200570752, 'nauc_mrr_at_100_max': 0.3931130859560323, 'nauc_mrr_at_100_std': -0.12151164513902976, 'nauc_mrr_at_100_diff1': 0.7477290192473905, 'nauc_mrr_at_200_max': 0.3930563769915757, 'nauc_mrr_at_200_std': -0.12160422717728198, 'nauc_mrr_at_200_diff1': 0.7477654293045622, 'nauc_mrr_at_500_max': 0.39307001982025963, 'nauc_mrr_at_500_std': -0.12175221978874587, 'nauc_mrr_at_500_diff1': 0.7477703271705433, 'nauc_mrr_at_1000_max': 0.39307001982025963, 'nauc_mrr_at_1000_std': -0.12175221978874587, 'nauc_mrr_at_1000_diff1': 0.7477703271705433, 'main_score': 0.78409}}

I am using bidirectional attention with EOS pooling and no sparse training.
To me, it doesn't seem like a problem in the way embedding bags are computed or used.
Can you please check?

Thanks in advance.

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions