i-TechX · amane123456 · May 7, 2025 · May 7, 2025 · May 7, 2025 · May 7, 2025
diff --git a/courses/CS243/CS243.01_Spring_2025/Homework Solution 作业答案/CS243_HW1_sol.pdf b/courses/CS243/CS243.01_Spring_2025/Homework Solution 作业答案/CS243_HW1_sol.pdf
diff --git a/courses/CS243/CS243.01_Spring_2025/Homework Solution 作业答案/CS243_HW2_sol.pdf b/courses/CS243/CS243.01_Spring_2025/Homework Solution 作业答案/CS243_HW2_sol.pdf
diff --git a/courses/CS243/CS243.01_Spring_2025/Homework Solution 作业答案/CS243_HW3_sol.pdf b/courses/CS243/CS243.01_Spring_2025/Homework Solution 作业答案/CS243_HW3_sol.pdf
diff --git a/courses/CS243/CS243.01_Spring_2025/Homework 作业/CS243_HW1.pdf b/courses/CS243/CS243.01_Spring_2025/Homework 作业/CS243_HW1.pdf
diff --git a/courses/CS243/CS243.01_Spring_2025/Homework 作业/CS243_HW1.zip b/courses/CS243/CS243.01_Spring_2025/Homework 作业/CS243_HW1.zip
diff --git a/courses/CS243/CS243.01_Spring_2025/Homework 作业/CS243_HW2.pdf b/courses/CS243/CS243.01_Spring_2025/Homework 作业/CS243_HW2.pdf
diff --git a/courses/CS243/CS243.01_Spring_2025/Homework 作业/CS243_HW2.zip b/courses/CS243/CS243.01_Spring_2025/Homework 作业/CS243_HW2.zip
diff --git a/courses/CS243/CS243.01_Spring_2025/Homework 作业/CS243_HW3.pdf b/courses/CS243/CS243.01_Spring_2025/Homework 作业/CS243_HW3.pdf
diff --git a/courses/CS243/CS243.01_Spring_2025/Homework 作业/CS243_HW3.zip b/courses/CS243/CS243.01_Spring_2025/Homework 作业/CS243_HW3.zip
diff --git a/courses/CS243/CS243.01_Spring_2025/Homework 作业/CS243_HW4.pdf b/courses/CS243/CS243.01_Spring_2025/Homework 作业/CS243_HW4.pdf
diff --git a/courses/CS243/CS243.01_Spring_2025/Homework 作业/CS243_HW4.zip b/courses/CS243/CS243.01_Spring_2025/Homework 作业/CS243_HW4.zip
diff --git a/courses/CS243/CS243.01_Spring_2025/Lecture Slides 教学课件/1_CS243_Week9.2.pdf b/courses/CS243/CS243.01_Spring_2025/Lecture Slides 教学课件/1_CS243_Week9.2.pdf
diff --git a/...243.01_Spring_2025/Lecture Slides 教学课件/CS243_Lec03_Dominate_Strategy_and_Truthfulness.pdf b/...243.01_Spring_2025/Lecture Slides 教学课件/CS243_Lec03_Dominate_Strategy_and_Truthfulness.pdf
diff --git a/...ses/CS243/CS243.01_Spring_2025/Lecture Slides 教学课件/CS243_Lec04_Mechanism_Design___VCG.pdf b/...ses/CS243/CS243.01_Spring_2025/Lecture Slides 教学课件/CS243_Lec04_Mechanism_Design___VCG.pdf
diff --git a/courses/CS243/CS243.01_Spring_2025/Lecture Slides 教学课件/CS243_Week1.1_Intro.pdf b/courses/CS243/CS243.01_Spring_2025/Lecture Slides 教学课件/CS243_Week1.1_Intro.pdf
diff --git a/courses/CS243/CS243.01_Spring_2025/Lecture Slides 教学课件/CS243_Week1.2-Basic_Concepts.pdf b/courses/CS243/CS243.01_Spring_2025/Lecture Slides 教学课件/CS243_Week1.2-Basic_Concepts.pdf
diff --git a/courses/CS243/CS243.01_Spring_2025/Lecture Slides 教学课件/CS243_Week10.2.pdf b/courses/CS243/CS243.01_Spring_2025/Lecture Slides 教学课件/CS243_Week10.2.pdf
diff --git a/courses/CS243/CS243.01_Spring_2025/Lecture Slides 教学课件/CS243_Week11.1.pdf b/courses/CS243/CS243.01_Spring_2025/Lecture Slides 教学课件/CS243_Week11.1.pdf
diff --git a/courses/CS243/CS243.01_Spring_2025/Lecture Slides 教学课件/CS243_Week11.2-I4EA-CS.pptx b/courses/CS243/CS243.01_Spring_2025/Lecture Slides 教学课件/CS243_Week11.2-I4EA-CS.pptx
diff --git a/courses/CS243/CS243.01_Spring_2025/Lecture Slides 教学课件/CS243_Week12.2_Computation.pdf b/courses/CS243/CS243.01_Spring_2025/Lecture Slides 教学课件/CS243_Week12.2_Computation.pdf
diff --git a/...s/CS243/CS243.01_Spring_2025/Lecture Slides 教学课件/CS243_Week12.2_Distributed_Mechanism.pdf b/...s/CS243/CS243.01_Spring_2025/Lecture Slides 教学课件/CS243_Week12.2_Distributed_Mechanism.pdf
diff --git a/courses/CS243/CS243.01_Spring_2025/Lecture Slides 教学课件/CS243_Week3.1.pdf b/courses/CS243/CS243.01_Spring_2025/Lecture Slides 教学课件/CS243_Week3.1.pdf
diff --git a/courses/CS243/CS243.01_Spring_2025/Lecture Slides 教学课件/CS243_Week3.2_Notes.pdf b/courses/CS243/CS243.01_Spring_2025/Lecture Slides 教学课件/CS243_Week3.2_Notes.pdf
diff --git a/courses/CS243/CS243.01_Spring_2025/Lecture Slides 教学课件/CS243_Week3.2最优拍卖.pptx b/courses/CS243/CS243.01_Spring_2025/Lecture Slides 教学课件/CS243_Week3.2最优拍卖.pptx
diff --git a/courses/CS243/CS243.01_Spring_2025/Lecture Slides 教学课件/CS243_Week4.2.1.pdf b/courses/CS243/CS243.01_Spring_2025/Lecture Slides 教学课件/CS243_Week4.2.1.pdf
diff --git a/courses/CS243/CS243.01_Spring_2025/Lecture Slides 教学课件/CS243_Week4.2.2.pptx b/courses/CS243/CS243.01_Spring_2025/Lecture Slides 教学课件/CS243_Week4.2.2.pptx
diff --git a/courses/CS243/CS243.01_Spring_2025/Lecture Slides 教学课件/CS243_Week5.1.1_广告拍卖.pptx b/courses/CS243/CS243.01_Spring_2025/Lecture Slides 教学课件/CS243_Week5.1.1_广告拍卖.pptx
diff --git a/courses/CS243/CS243.01_Spring_2025/Lecture Slides 教学课件/CS243_Week5.1.2_双边拍卖.pptx b/courses/CS243/CS243.01_Spring_2025/Lecture Slides 教学课件/CS243_Week5.1.2_双边拍卖.pptx
diff --git a/courses/CS243/CS243.01_Spring_2025/Lecture Slides 教学课件/CS243_Week5.2.pdf b/courses/CS243/CS243.01_Spring_2025/Lecture Slides 教学课件/CS243_Week5.2.pdf
diff --git a/courses/CS243/CS243.01_Spring_2025/Lecture Slides 教学课件/CS243_Week6.1.pdf b/courses/CS243/CS243.01_Spring_2025/Lecture Slides 教学课件/CS243_Week6.1.pdf
diff --git a/courses/CS243/CS243.01_Spring_2025/Lecture Slides 教学课件/CS243_Week6.2.pdf b/courses/CS243/CS243.01_Spring_2025/Lecture Slides 教学课件/CS243_Week6.2.pdf
diff --git a/courses/CS243/CS243.01_Spring_2025/Lecture Slides 教学课件/CS243_Week7.2.pdf b/courses/CS243/CS243.01_Spring_2025/Lecture Slides 教学课件/CS243_Week7.2.pdf
diff --git a/courses/CS243/CS243.01_Spring_2025/Lecture Slides 教学课件/CS243_Week8.1.pdf b/courses/CS243/CS243.01_Spring_2025/Lecture Slides 教学课件/CS243_Week8.1.pdf
diff --git a/courses/CS243/CS243.01_Spring_2025/Lecture Slides 教学课件/CS243_Week8.2.pdf b/courses/CS243/CS243.01_Spring_2025/Lecture Slides 教学课件/CS243_Week8.2.pdf
diff --git a/courses/CS243/CS243.01_Spring_2025/Lecture Slides 教学课件/CS243_Week9.1.pdf b/courses/CS243/CS243.01_Spring_2025/Lecture Slides 教学课件/CS243_Week9.1.pdf
diff --git a/courses/CS243/CS243.01_Spring_2025/Lecture Slides 教学课件/CS243_Week9.2.pdf b/courses/CS243/CS243.01_Spring_2025/Lecture Slides 教学课件/CS243_Week9.2.pdf
diff --git a/courses/CS243/CS243.01_Spring_2025/Lecture Slides 教学课件/aaai22tutorial1-Week4.1.pdf b/courses/CS243/CS243.01_Spring_2025/Lecture Slides 教学课件/aaai22tutorial1-Week4.1.pdf
diff --git a/courses/CS243/CS243.01_Spring_2025/Lecture Slides 教学课件/slide_share.pdf b/courses/CS243/CS243.01_Spring_2025/Lecture Slides 教学课件/slide_share.pdf
diff --git a/courses/CS243/CS243.01_Spring_2025/Project 课程设计/Project_Topic.docx b/courses/CS243/CS243.01_Spring_2025/Project 课程设计/Project_Topic.docx
diff --git a/courses/CS243/CS243.01_Spring_2025/Project 课程设计/Proposal__template2.zip b/courses/CS243/CS243.01_Spring_2025/Project 课程设计/Proposal__template2.zip
diff --git a/courses/CS243/meta.json b/courses/CS243/meta.json
@@ -2,6 +2,22 @@
     "code": "CS243",
     "name": "Introduction to Algorithmic Game Theory",
     "semesters": [
+        {
+            "season": "Spring",
+            "year": "2025",
+            "course_id": "CS243.01",
+            "department": "信息科学与技术学院",
+            "credit": "4",
+            "hours": "64",
+            "prerequisite": "无",
+            "evaluation": "Classroom Quizzes (10%); Homework (25%); Project (25%); Exam (40%)",
+            "teacher": [
+                "zhaodj"
+            ],
+            "ta": [
+                "unknown"
+            ]
+        },
         {
             "course_id": "CS243.01",
             "season": "Spring",
@@ -27,7 +43,11 @@
                 "zhaodj"
             ],
             "ta": [
-                "zhangty", "yangty2", "zhangyao1", "lianxy", "gexu"
+                "zhangty",
+                "yangty2",
+                "zhangyao1",
+                "lianxy",
+                "gexu"
             ],
             "department": "信息科学与技术学院",
             "credit": "4",

diff --git a/courses/CS245/CS245.01_Spring_2025/Homework 作业/CS245_2025_hw1.pdf b/courses/CS245/CS245.01_Spring_2025/Homework 作业/CS245_2025_hw1.pdf
diff --git a/courses/CS245/CS245.01_Spring_2025/Homework 作业/CS245_2025_hw1.tex b/courses/CS245/CS245.01_Spring_2025/Homework 作业/CS245_2025_hw1.tex
@@ -0,0 +1,154 @@
+\documentclass[11pt,oneside,a4paper]{article}
+\usepackage[top=1 in, bottom=1 in, left=0.85 in, right=0.85 in]{geometry}
+\usepackage{hyperref}
+ \usepackage{float}
+\usepackage{subcaption}
+
+\usepackage{amsmath,amssymb,graphicx,url, algorithm2e}
+\usepackage{thmtools,thm-restate,wrapfig,enumitem,mathabx}
+\newtheorem{assumption}{Assumption}
+\newtheorem{theorem}{Theorem}
+\newtheorem{remark}{Remark}
+\newtheorem{lemma}{Lemma}
+\newtheorem{corollary}{Corollary}
+\newtheorem{proof}{Proof}
+\usepackage{color}
+
+\newcommand{\red}[1]{\textcolor{red}{#1}}
+\newcommand{\blue}[1]{\textcolor{blue}{#1}}
+\newcommand{\ceil}[1]{\lceil #1 \rceil}
+\DeclareMathOperator*{\argmax}{arg\,max}
+\DeclareMathOperator*{\argmin}{arg\,min}
+
+
+\title{Online Optimization and Learning (CS245)}
+\author{Name: ~~~~~~~~~~~~ID: ~~~~~~~~~~~~Email: ~~~~~~~~~~~~}
+\date{}
+
+\begin{document}
+
+\maketitle
+\noindent
+\rule{\linewidth}{0.4pt}
+{\bf {\large Rules:}}
+\begin{enumerate} 
+    \item Deadline: \textcolor{red}{\textbf{2025-03-29/23:59:59}}.\\ The grade of the late submission subjects to the decaying policy $(75\%, 50\%, 25\%)$.
+    \item Please do latex your homework and no handwriting is accepted.
+    \item Submit your homework to TA(guohq@shanghaitech.edu.cn), including your PDF and Code, with filename ``{\sf name+id+CS245HW1.zip}''.
+    \item \textcolor{red}{Plagiarism is not allowed.} You will fail this homework if any plagiarism is detected.
+    \end{enumerate}
+    \vspace{-0.3cm}
+\rule{\linewidth}{0.4pt}
+
+\newpage
+
+\noindent  {\bf Problem 1: Adaptive Online Learning.}
+\vspace{0.5cm}
+
+\vspace{0.1in}
+\hrule
+\vspace{0.05in}
+\noindent{\bf Online Mirror Desecent}
+\vspace{0.05in}
+\hrule
+\vspace{0.05in}
+
+\noindent {\bf Initialization:} $x_1\in \mathcal K,$ learning rate $\eta_t$ and regularizer $\psi(\cdot).$  
+
+\noindent For $t=1,\cdots, T:$ 
+\begin{itemize}
+\item \noindent {\bf Learner:}  Submit $x_t.$
+\item {\bf Environment:} Observe the loss gradient $l_t$.
+\item {\bf Update:} $x_{t+1} = \argmin_{x\in \mathcal K} ~\langle l_t,x \rangle + \frac{1}{\eta_t} B_\psi(x;x_{t}).$
+
+\end{itemize}
+\vspace{0.02in}
+\hrule
+\vspace{0.1in}
+
+\vspace{0.1in}
+\hrule
+\vspace{0.05in}
+\noindent{\bf Follow-The-Regularized-Leader}
+\vspace{0.05in}
+\hrule
+\vspace{0.05in}
+
+\noindent {\bf Initialization:} $x_1\in \mathcal K,$ learning rate $\eta_t$ and regularizer $R(\cdot).$
+
+\noindent For $t=1,\cdots, T:$ 
+\begin{itemize}
+\item \noindent {\bf Learner:}  Submit $x_t.$
+\item {\bf Environment:} Observe the loss function $f_t(x) = \langle l_t,x \rangle$
+\item {\bf Update:} $x_{t+1} = \argmin_{x\in \mathcal K} ~ \sum_{s=1}^t f_s(x) + \frac{1}{\eta_t} R(x).$
+
+\end{itemize}
+\vspace{0.02in}
+\hrule
+\vspace{0.1in}
+Here we introduce the Online Mirror Descent (OMD) and Follow-The-Regularized-Leader (FTRL) algorithms for linear loss function $f_t(x) = \langle l_t, x\rangle \geq 0$. Let's consider the following problem (please provide the detailed steps to justify your answers):
+\begin{itemize}
+    \item Let regularizer $\psi(x) = R(x) = \frac{1}{2} \|x\|^2$, please design adaptive learning rate for $\eta_t$ to prove that OMD and FTRL achieves sub-linear regret, where the regret is defined as usually $$\mathcal R(T)=\sum_{t=1}^T f_t(x_t) - \min_{x\in \mathcal K} \sum_{t=1}^T f_t(x).$$
+    \item FTRL and OMD both have regularization terms, can you explain the connections between these two algorithms?
+\end{itemize}
+
+
+
+\newpage
+
+\noindent  {\bf Problem 2: Online Mirror Descent for LLM Preference Alignment.}
+\vspace{0.5cm}
+
+\noindent 
+Reinforcement learning from human feedback (RLHF) has effectively aligned large language models (LLMs) with human preferences. Given a prompt $x_t$ sampled from the dataset according to a specific distribution, the LLM model samples a pair of responses $(y^1_t,y^2_t)$ from the policy distribution $\pi_t \in \Pi$ (where $\Pi$ represents the set of all possible response distributions). A human evaluator then selects the preferred response, and the LLM is fine-tuned based on this preference feedback.
+
+Here, we consider a simplified formulation of the problem. After choosing the policy $\pi_t$, the LLM model has access to the reward function $r_t(y) = \mathbb{E}_{x_t}[ \mathbb{P}(y \succ \pi_t)|x_t] = \mathbb{E}_{y'\sim\pi_t}[\mathbb{P}(y \succ y') ]$ (which denotes the expected win rate of response $y$ against the current policy $\pi_t$). Below, we present a Mirror Descent Algorithm designed to address this problem.
+\vspace{0.1in}
+\hrule
+\vspace{0.05in}
+\noindent{\bf Online Mirror Descent}
+\vspace{0.05in}
+\hrule
+\vspace{0.05in}
+
+\noindent {\bf Initialization:} $\pi_1 \in \Pi$, Number of iterations $T$, learning rate $\eta$, preference oracle $\mathbb{P}$.
+
+\noindent For $t=1,\cdots, T:$ 
+\begin{itemize}
+\item \noindent {\bf Learner:}  Submit $\pi_t$ and construct response pairs.
+\item {\bf Environment:} The preference oracle $\mathbb{P}$ outputs the reward function $r_t$.
+\item {\bf Algorithm:} (Given the prediction $M_{t+1}$,) Conduct the following (Optimistic) Online Mirror Descent to output the next-step policy $\pi_{t+1}$:
+\begin{align*}
+    \pi_{t+1} = \argmax_{\pi \in \Pi} \langle \pi, r_t \rangle - \frac{1}{\eta} \text{KL}(\pi\|\pi_t). 
+\end{align*}
+\end{itemize}
+\vspace{0.02in}
+\hrule
+\vspace{0.1in}
+
+\noindent Let's consider the following problem (please provide the detailed steps to justify your answers):
+\begin{itemize}
+    \item Can you provide the closed-form updates for the above Online Mirror Descent algorithm?
+    \item To judge the performance of the algorithm, let $\bar \pi := \frac{1}{T}\sum_{t=1}^T \pi_t$, $J(\pi_1,\pi_2):= \mathbb{E}_{x,y^1\sim \pi_1,~y^2\sim \pi_2}[\mathbb{P}(y^1\succ y^2) |x] =\mathbb{E}_{y^1\sim \pi_1,~y^2\sim \pi_2}[\mathbb{P}(y^1\succ y^2)] $, the corresponding DualGap is defined as
+    \begin{align*}
+        \text{DualGap}(\bar \pi):= \max_{\pi_1} J(\pi_1, \pi) - \min_{\pi_2}J(\pi,\pi_2).
+    \end{align*}
+    When $\text{DualGap}(\bar \pi)=0$, it indicates that the Nash equilibrium has been reached. Consequently, the objective is to minimize the DualGap, as it serves as a measure of how closely a policy approximates the Nash equilibrium. Please choose proper $\eta$ and prove the DualGap of \textbf{Online Mirror Descent}.
+    \item For this problem, please write up the  \textbf{Optimistic Online Mirror Descent} algorithm, set proper learning rates, and prove the DualGap.
+\end{itemize}
+(Hint: Find the relationship between \textit{Regret} and \textit{Dual Gap}, and recall the symmetric nature of the game.)
+\newpage
+\noindent  {\bf Problem 3: (Adaptive) Gradient Descent Algorithms for Linear Regression.}
+\vspace{0.5cm}
+
+\noindent In this problem, you will train a linear regression model to predict estate prices.
+\vspace{0.1cm}
+
+\noindent The dataset is available at: \url{https://archive.ics.uci.edu/dataset/477/real+estate+valuation+data+set}, where the first six fields are the features of the estate and the last field is the price. For this dataset, you need to:
+\begin{itemize}
+    \item Split the data into train and test sets in a proper way and utilize a linear regression model to predict the evaluation level. You are asked to implement \red{Gradient Descent Algorithm}, \red{Adaptive Gradient Descent Algorithm}, \red{Root Mean Square Propagation Algorithm}, and \red{Adam Algorithm} in training your model. (You may need to preprocess the data before training to ensure convergence.)
+    \item Please plot your training losses, test your models on your test dataset, and compare their performance.
+\end{itemize}
+\noindent Please implement algorithm code with Python $3$, and make sure your code works.
+
+\end{document}
diff --git a/courses/CS245/CS245.01_Spring_2025/Homework 作业/CS245_hw2.pdf b/courses/CS245/CS245.01_Spring_2025/Homework 作业/CS245_hw2.pdf
diff --git a/courses/CS245/CS245.01_Spring_2025/Homework 作业/CS245_hw2.tex b/courses/CS245/CS245.01_Spring_2025/Homework 作业/CS245_hw2.tex
@@ -0,0 +1,162 @@
+\documentclass[11pt,oneside,a4paper]{article}
+\usepackage[top=1 in, bottom=1 in, left=0.85 in, right=0.85 in]{geometry}
+\usepackage{hyperref}
+ \usepackage{float}
+\usepackage{subcaption}
+
+\usepackage{amsmath,amssymb,graphicx,url, algorithm2e}
+\usepackage{thmtools,thm-restate,wrapfig,enumitem,mathabx}
+\newtheorem{assumption}{Assumption}
+\newtheorem{theorem}{Theorem}
+\newtheorem{remark}{Remark}
+\newtheorem{lemma}{Lemma}
+\newtheorem{corollary}{Corollary}
+\newtheorem{proof}{Proof}
+\usepackage{color}
+
+\newcommand{\red}[1]{\textcolor{red}{#1}}
+\newcommand{\blue}[1]{\textcolor{blue}{#1}}
+\newcommand{\ceil}[1]{\lceil #1 \rceil}
+\DeclareMathOperator*{\argmax}{arg\,max}
+\DeclareMathOperator*{\argmin}{arg\,min}
+
+
+\title{Online Optimization and Learning (CS245)}
+\author{Name: ~~~~~~~~~~~~ID: ~~~~~~~~~~~~Email: ~~~~~~~~~~~~}
+\date{}
+
+\begin{document}
+
+\maketitle
+\noindent
+\rule{\linewidth}{0.4pt}
+{\bf {\large Rules:}}
+\begin{enumerate} 
+    \item Deadline: \textcolor{red}{\textbf{2025/4/14/23:59:59}}.\\ The grade of the late submission subjects to the decaying policy $(75\%, 50\%, 25\%)$.
+    \item Please do latex your homework and no handwriting is accepted.
+    \item Submit your homework to TA(guohq@shanghaitech.edu.cn), including your PDF and Code, with filename ``{\sf name+id+CS245HW2.zip}''.
+    \item \textcolor{red}{Plagiarism is not allowed.} You will fail this homework if any plagiarism is detected.
+    \end{enumerate}
+    \vspace{-0.3cm}
+\rule{\linewidth}{0.4pt}
+
+\newpage
+\noindent  {\bf Problem 1: Explore-then-Exploit in Bandits}
+
+\vspace{0.5cm}
+\noindent 
+Explore-then-Exploit is a simple and efficient algorithm for K-armed bandit problems.
+\vspace{0.1in}
+\hrule
+\vspace{0.05in}
+\noindent{\bf Explore-then-Exploit}
+\vspace{0.05in}
+\hrule
+\vspace{0.05in}
+
+\noindent {\bf Initialization:} Time horizon $T$, exploration times $N$.
+
+\noindent {\bf Exploration:} \noindent In the first phase, the choice of arms does not depend on the observed rewards, and each arm is played for $N$ times.
+
+\noindent {\bf 
+Exploitation:} In all remaining rounds, the algorithm selects the arm with the highest empirical mean reward based on the exploration phase.
+
+\vspace{0.02in}
+\hrule
+\vspace{0.5cm}
+\noindent Consider the stochastic multi-armed bandit setting:
+\begin{itemize}
+    \item After $N$ times explorations, for any arm $a$,derive an upper bound on the expected estimation error $|\mu_a - \bar \mu_a|$, where $\mu_a$ is the true mean reward and $\bar \mu_a$ is its empirical mean estimate.
+    \item Choose proper $N$, and prove an \textbf{upper bound} on the regret for the above algorithm.
+    \item Specialize the analysis to the two-armed case and establish a \textbf{lower bound} on the regret of the algorithm.
+\end{itemize}
+
+\vspace{0.5cm}
+{\noindent \bf Solution:}
+
+
+\newpage
+\noindent  {\bf Problem 2: Online Mirror Descent for Adversarial/Stochastic Bandits}
+\vspace{0.5cm}
+
+\noindent We have discussed Online Mirror Descent is possible to achieve good performance for Adversarial/Stochastic Bandits. 
+\vspace{0.05in}
+\hrule
+\vspace{0.05in}
+\noindent{\bf Online Mirror Descent for Adversarial/Stochastic Bandits}
+\vspace{0.05in}
+\hrule
+\vspace{0.05in}
+
+\noindent {\bf Initialization:} $x_1 = [1/K,...,1/K]$ and learning rate $\eta_t$. \\
+\noindent For each round $t=1,\cdots, T:$ 
+\begin{itemize}
+\item {\bf Learner:}  Sample an arm $i$ from $x_t$.
+\item {\bf Environment:} Observe the reward of arm $i$ : $r_t(i)$.
+\item {\bf Estimator:} $\hat{r}_t(i) = r_t(i)/x_t(i)$ and 0 otherwise.
+\item {\bf Update:} $x_{t+1} = \argmin_{x \in \mathcal K} \langle x, -\hat{r}_t \rangle + \frac{1}{\eta_t} B_{\Psi}(x;x_t).$
+\end{itemize}
+\vspace{0.02in}
+\hrule
+\vspace{0.1in}
+
+\noindent If the regularizer $\Psi(x)$ is the negative entropy function, $B_{\psi}$ is the KL divergence and the algorithm is the classical EXP3 algorithm. 
+\vspace{0.1cm}
+
+\noindent Now we consider a different regularizer $\psi(x) = -\sum_{i=1}^K \sqrt{x_i}.$
+\begin{itemize}
+    \item For adversarial bandits, please provide the regret analysis of the algorithm with a proper adaptive learning rate $\eta_t$ and compare it with the regret of EXP3.
+    \item For stochastic bandits, please try to provide a possible problem dependent regret analysis of the algorithm with a proper adaptive learning rate $\eta_t$.
+\end{itemize}
+\vspace{0.5cm}
+{\noindent \bf Solution:}
+
+
+\newpage
+\noindent  {\bf Problem 3: Bandit Algorithms}
+\vspace{0.5cm}
+
+\noindent Consider the following protocol of Bandits problem.
+\vspace{0.05in}
+\hrule
+\vspace{0.05in}
+\noindent{\bf Learning in Bandits}
+\vspace{0.05in}
+\hrule
+\vspace{0.05in}
+
+\noindent {\bf Initialization:} $K$ arms. \\
+\noindent For each round $t=1,\cdots, T:$ 
+\begin{itemize}
+\item \noindent {\bf Learner:}  Choose an arm $i \in [K].$
+\item {\bf Environment:} Observe the loss of picked arm $\ell_{t,i}.$
+\end{itemize}
+\vspace{0.02in}
+\hrule
+\vspace{0.1in}
+
+\noindent In this problem, we provide an environment with $K=32$ arms and $T=5000$ rounds, where each round you will receive a \textbf{loss} of your picked arm (note to be consistent with Homework $1,$ the environment returns the loss instead of reward). 
+\vspace{0.2cm}
+
+\noindent Let's apply the following  algorithms: 
+\begin{itemize}
+    \item Explore-then-exploit Algorithm in Problem $1$.
+    \item UCB Algorithm: A classical algorithm for stochastic bandits. 
+    \item Thompson Sampling Algorithm: A classical algorithm for stochastic bandits. 
+    \item EXP3 Algorithm: A classical algorithm for adversarial bandits.
+    \item Online Mirror Descent with a log-barrier regularizer.
+    \item Online Mirror Descent with $\Psi(x) = -\sum_i \sqrt{x_i}$ in Problem $2$.
+\end{itemize}
+
+\noindent Like in Homework $1,$ you are supposed to choose the proper learning rates and plot the trajectories of algorithms. 
+\vspace{0.2cm}
+
+\noindent  
+Please read the code sample and implement the algorithms with Python $3$. 
+
+\vspace{0.2cm}
+\noindent Note after you submit the code, we will also test your algorithm in other environments. 
+
+\vspace{0.5cm}
+
+\end{document}
diff --git a/courses/CS245/CS245.01_Spring_2025/Homework 作业/CS245_hw2_code_environment.rar b/courses/CS245/CS245.01_Spring_2025/Homework 作业/CS245_hw2_code_environment.rar
diff --git a/courses/CS245/CS245.01_Spring_2025/Homework 作业/CS245_hw3.pdf b/courses/CS245/CS245.01_Spring_2025/Homework 作业/CS245_hw3.pdf