Merge branch 'master' of http://62.234.201.16/hao/AAAI21_Emergent_language

d88bfadb · Zidong Du · aee4d501 · e019c58d · d88bfadb
Commit d88bfadb authored Sep 08, 2020 by Zidong Du
Show whitespace changes
Inline Side-by-side

Showing with 38 additions and 0 deletions

AAAI2021/paper.tex
+38 -0

No files found.
--- a/AAAI2021/paper.tex
+++ b/AAAI2021/paper.tex
@@ -21,6 +21,8 @@
 \frenchspacing  % DO NOT CHANGE THIS
 \setlength{\pdfpagewidth}{8.5in}  % DO NOT CHANGE THIS
 \setlength{\pdfpageheight}{11in}  % DO NOT CHANGE THIS
+\usepackage{algorithm}  
+\usepackage{algorithmic} 
 %\nocopyright
 %PDF Info Is REQUIRED.
 % For /Author, add all authors within the parentheses, separated by commas. No accents or commands.
@@ -191,6 +193,42 @@
 \input{tex/experiments.tex}
 \input{tex/last.tex}

+\begin{algorithm}[!h]
+	\caption{OurAlgorithm$(t,\hat{t})$}
+	\begin{algorithmic}[1]
+		\IF{Training the speaker agent S}
+		\FOR{Batch T randomly selected from $M_0\times M_1$}
+        \FOR{$t=(c_0,c_1)$ in T}
+        \STATE $P(s_0|t),P(s_1|t)=\pi_{old}^S(s=(s_0,s_1)|t)$
+        \STATE Sample $s_0$ with $P(s_0|t)$, $s_1$ with $P(s_1|t)$
+        \STATE $P(\hat{t}|s) = \pi^L(\hat{t}|s)$ 
+        \STATE Sample $\hat{t}$ with $P(\hat{t}|s)$
+        \STATE Get reward $R(\hat{t},t)$
+        \STATE $J(\theta^S,\theta^L)=E_{\pi_{old}^S,\pi^L}[R(\hat{t},t)\cdot\frac{\pi^S(s|t)}{\pi^S_{old}(s|t)}]$
+        \STATE Update $\theta^S$ by $\bigtriangledown_{\theta^S}J$
+        \ENDFOR
+        \STATE $\pi_{old}^S\leftarrow \pi^S$
+		\ENDFOR
+		\ENDIF
+	
+		\IF{Training the listener agent L}
+		\FOR{Batch T randomly selected from $M_0\times M_1$}
+		\FOR{$t=(c_0,c_1)$ in T}
+		\STATE $P(s_0|t),P(s_1|t)=\pi^S(s=(s_0,s_1)|t)$
+		\STATE Sample $s_0$ with $P(s_0|t)$, $s_1$ with $P(s_1|t)$
+		\STATE $P(\hat{t}|s) = \pi^L_{old}(\hat{t}|s)$ 
+		\STATE Sample $\hat{t}$ with $P(\hat{t}|s)$
+		\STATE Get reward $R(\hat{t},t)$
+		\STATE $J(\theta^S,\theta^L)=E_{\pi_{old}^S,\pi^L}[R(\hat{t},t)\cdot\frac{\pi^L(s|t)}{\pi^L_{old}(s|t)}]$
+		\STATE Update $\theta^L$ by $\bigtriangledown_{\theta^L}J$
+		\ENDFOR
+		\STATE $\pi_{old}^L\leftarrow \pi^L$
+		\ENDFOR
+		\ENDIF
+	\end{algorithmic}
+\end{algorithm}
+
+
 \bibliography{ref.bib}

 \end{document}