cyber2a · minump · Oct 25, 2024 · Nov 21, 2024 · May 2, 2025 · May 2, 2025
diff --git a/images/foundation-models/ar-nar.png b/images/foundation-models/ar-nar.png
diff --git a/images/foundation-models/diffusion-training-2.png b/images/foundation-models/diffusion-training-2.png
diff --git a/references.bib b/references.bib
@@ -1289,6 +1289,55 @@ @article{amundson_leconte_2019
     file = {Snapshot:/Users/sandeep/Zotero/storage/BLQW3X6X/doi10.html:text/html},
 }
 
+@inproceedings{li-etal-2022-elmer,
+    title = "{ELMER}: A Non-Autoregressive Pre-trained Language Model for Efficient and Effective Text Generation",
+    author = "Li, Junyi  and
+      Tang, Tianyi  and
+      Zhao, Wayne Xin  and
+      Nie, Jian-Yun  and
+      Wen, Ji-Rong",
+    editor = "Goldberg, Yoav  and
+      Kozareva, Zornitsa  and
+      Zhang, Yue",
+    booktitle = "Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing",
+    month = dec,
+    year = "2022",
+    address = "Abu Dhabi, United Arab Emirates",
+    publisher = "Association for Computational Linguistics",
+    url = "https://aclanthology.org/2022.emnlp-main.68/",
+    doi = "10.18653/v1/2022.emnlp-main.68",
+    pages = "1044--1058",
+    abstract = "We study the text generation task under the approach of pre-trained language models (PLMs). Typically, an auto-regressive (AR) method is adopted for generating texts in a token-by-token manner. Despite many advantages of AR generation, it usually suffers from inefficient inference. Therefore, non-autoregressive (NAR) models are proposed to generate all target tokens simultaneously. However, NAR models usually generate texts of lower quality due to the absence of token dependency in the output text. In this paper, we propose ELMER: an efficient and effective PLM for NAR text generation to explicitly model the token dependency during NAR generation. By leveraging the early exit technique, ELMER enables the token generations at different layers, according to their prediction confidence (a more confident token will exit at a lower layer). Besides, we propose a novel pre-training objective, Layer Permutation Language Modeling, to pre-train ELMER by permuting the exit layer for each token in sequences. Experiments on three text generation tasks show that ELMER significantly outperforms NAR models and further narrows the performance gap with AR PLMs (ELMER (29.92) vs BART (30.61) ROUGE-L in XSUM) while achieving over 10 times inference speedup."
+}
+
+@article{CHEN2024116651,
+title = {TemproNet: A transformer-based deep learning model for seawater temperature prediction},
+journal = {Ocean Engineering},
+volume = {293},
+pages = {116651},
+year = {2024},
+issn = {0029-8018},
+doi = {https://doi.org/10.1016/j.oceaneng.2023.116651},
+url = {https://www.sciencedirect.com/science/article/pii/S0029801823030354},
+author = {Qiaochuan Chen and Candong Cai and Yaoran Chen and Xi Zhou and Dan Zhang and Yan Peng},
+keywords = {Transformer, Satellite observation, Deep learning, Seawater temperature},
+abstract = {Accurate prediction of seawater temperature is crucial for meteorological model understanding and climate change assessment. This study proposes TempreNet, a deep learning model based on a transformer and convolutional neural network, to accurately predict subsurface seawater temperature using satellite observations in the South China Sea. TemproNet uses multivariate sea surface observations such as sea level anomaly (SLA), sea surface temperature (SST), and sea surface wind (SSW) as model inputs, which employs a hierarchical transformer encoder to extract the multi-scale feature, uses a lightweight convolutional decoder to predict seawater temperature. We train and validate the model using the CMEMS temperature dataset and compare its accuracy with Attention-Unet, LightGMB, and ANN. Experimental results show that TemproNet has significantly outperformed other models with RMSE and R2 of 0.52 °C and 0.83 in a 32-layer temperature profile prediction task over 200 m in the South China Sea. In addition, we fully demonstrate the error of our model in space, in time, and at different depths, showing the efficiency and stability of our model. The input sensitivity analysis showed that SST contributed more to predicting shallow water temperature, while SLA significantly impacted the prediction of mid-deep water temperature. The results of this study provide an innovative and reliable solution for seawater temperature prediction and have important implications for meteorological model understanding and climate change assessment.}
+}
+
+@article{KIM2023106920,
+title = {PolarGAN: Creating realistic Arctic sea ice concentration images with user-defined geometric preferences},
+journal = {Engineering Applications of Artificial Intelligence},
+volume = {126},
+pages = {106920},
+year = {2023},
+issn = {0952-1976},
+doi = {https://doi.org/10.1016/j.engappai.2023.106920},
+url = {https://www.sciencedirect.com/science/article/pii/S0952197623011041},
+author = {Mingyu Kim and Jaekyeong Lee and Leechan Choi and Minjoo Choi},
+keywords = {Generative adversarial networks, Sea ice concentration, Artificial sea ice images, Geometric preferences, Data augmentation},
+abstract = {In this paper, we introduce a novel generative adversarial network (GAN), called PolarGAN, that is capable of creating realistic artificial images of Arctic sea ice concentration (SIC) for data augmentation. One of the key features of the PolarGAN is that it considers real-valued geometric preferences, defined by six statistics, to generate SIC images that align with specific geometric characteristics. Unlike other GANs that also consider user-defined preferences, the PolarGAN allows for more detailed control over the shape and size of the generated images by using differentiable projection functions to convert the created images into geometric features, and a newly-designed loss function to minimize the gap between the user-defined preferences and the geometric features of the generated images. Through extensive experimentation, we compare the PolarGAN with other GANs and demonstrate artificial SIC scenarios that can be used to test the performance of algorithms for Arctic route planning in edge cases or to improve data-driven models such as SIC prediction models which require additional data to avoid overfitting issues.}
+}
+
 @book{jackson2025ai,
     title     = {Artificial Intelligence},
     author    = {Jackson, Tom},
@@ -1297,4 +1346,4 @@ @book{jackson2025ai
     publisher = {New Burlington},
     isbn      = {9781802421446},
     pages     = {176}
-}
+}