..

37549f10 · YZhao · 9160a492 · 37549f10 · 37549f10 · 37549f10
Commit 37549f10 authored Sep 10, 2020 by YZhao
Showing with 47 additions and 23 deletions

AAAI2021/fig/Figure5_An_emergent_language.pdf
+0 -0

AAAI2021/tex/experiments.tex
+37 -9

AAAI2021/tex/relatedwork.tex
+0 -4

AAAI2021/tex/theory.tex
+9 -9

AAAI2021/tex/theory2.tex
+1 -1

No files found.
--- a/AAAI2021/fig/Figure5_An_emergent_language.pdf
+++ b/AAAI2021/fig/Figure5_An_emergent_language.pdf
--- a/AAAI2021/tex/experiments.tex
+++ b/AAAI2021/tex/experiments.tex
@@ -18,14 +18,42 @@
  \label{fig:exp2}
 \end{figure}

-\begin{figure}[t]
+%\begin{figure}[t]
+%  \centering
+%  \includegraphics[width=0.99\columnwidth]{fig/Figure10_p_value.pdf}
+%  \caption{The Chi-square test between high-compositionality and agent
+%    capacity. (a) $MIS>0.99$. (b)
+%    $MIS>0.9$.}
+%  \label{fig:exp10}
+%\end{figure}
+
+\begin{table}[b]
  \centering
-  \includegraphics[width=0.99\columnwidth]{fig/Figure10_p_value.pdf}
-  \caption{The Chi-square test between high-compositionality and agent
-    capacity. (a) $MIS>0.99$. (b)
-    $MIS>0.9$.}
-  \label{fig:exp10}
-\end{figure}
+  \caption{The Chi-square test between high-compositionality and agent capacity.}
+  \label{tab:exp10}
+  \begin{tabular}{cccc}
+    \toprule
+    \multicolumn{4}{c}{$H_0$: $\mathit{MIS} > 0.90$ is independent with $h_{\mathit{size}}$}\\
+    \midrule
+    Vocabulary size & $\chi^2$ & $df$ & $p$-value \\
+    \midrule
+    4 & 22.20 & 10 & $1.41\times 10^{-2}$ \\
+    6 & 27.52 & 10 & $2.16\times 10^{-3}$ \\
+    10 & 64.46 & 10 & $5.14\times 10^{-10}$ \\
+    \bottomrule
+    \multicolumn{4}{c}{\vspace{1em}}\\
+    \toprule
+    \multicolumn{4}{c}{$H_0$: $\mathit{MIS} > 0.99$ is independent with $h_{\mathit{size}}$}\\
+    \midrule
+    Vocabulary size & $\chi^2$ & $df$ & $p$-value \\
+    \midrule
+    4 & 30.19 & 10 & $7.97\times 10^{-4}$ \\
+    6 & 25.96 & 10 & $3.80\times 10^{-3}$ \\
+    10 & 33.80 & 10 & $2.00\times 10^{-4}$ \\
+    \bottomrule
+    \end{tabular}
+  \end{table}
+

 \begin{figure}[t]
  \centering
@@ -84,8 +112,8 @@ more meanings, for the constraint from low capacity.

 Additionally, we also perform $\chi^2$ test to check the statistical
 significance between the high compositionality and agent
-capacity. Figure~\ref{fig:exp10} reports the $\chi^2$ test results for
-$MIS>0.99$ and $MIS>0.9$ in (a) and (b), respectively. It can be observed that
+capacity. Table~\ref{tab:exp10} reports the $\chi^2$ test results for
+$\mathit{MIS}>0.99$ and $\mathit{MIS}>0.9$, respectively. It can be observed that
 for different vocabulary size, the p-value is always less than 0.05, which means
 the high compositionality has statistical significance related to agent
 capacity.

--- a/AAAI2021/tex/relatedwork.tex
+++ b/AAAI2021/tex/relatedwork.tex
-<<<<<<< HEAD
 \section{Related works}
 \label{sec:relatedwork}

 \begin{table*}[b]
-=======
-\begin{table*}[htbp]
->>>>>>> 013236e0637a916d76a342113079f93be73ec3a7
  \centering
  \small
  \caption{Handcrafted inductions in related works.}

--- a/AAAI2021/tex/theory.tex
+++ b/AAAI2021/tex/theory.tex
@@ -29,8 +29,8 @@ to finish the game in a cooperative manner. In each round, once received an
 input object $t$, Speaker $S$ speaks a symbol sequence $s$ to Listener $L$ ;
 Listener $L$ reconstruct the predicted result $\hat{t}$ based on the listened
 sequence $s$; if $t=\hat{t}$, agents win this game and receive positive rewards
-($R(t,\hat{t})=1$); otherwise agents fail this game and receive negative rewards
-($R(t,\hat{t})=-1$).
+($r(t,\hat{t})=1$); otherwise agents fail this game and receive negative rewards
+($r(t,\hat{t})=-1$).

 Precisely, during the game, Speaker $S$ receives an input object $t$, which is
 an expression with two words from the vocabulary set $V$, i.e., two
@@ -84,7 +84,7 @@ Algorithm~\ref{al:learning}, we train the separate Speaker $S$ and Listener $L$ 
 Stochastic Policy Gradient methodology in a tick-tock manner, i.e, training one
 agent while keeping the other one. Roughly, when training the Speaker, the
 target is set to maximize the expected reward
-$J(\theta_S, \theta_L)=E_{\pi_S,\pi_L}[R(t, \hat{t})]$ by adjusting the parameter
+$J(\theta_S, \theta_L)=E_{\pi_S,\pi_L}[r(t, \hat{t})]$ by adjusting the parameter
 $\theta_S$, where $\theta_S$ is the neural network parameters of Speaker $S$
 with learned output probability distribution $\pi_S$, and $\theta_L$ is the
 neural network parameters of Listener with learned probability distribution $\pi_L$.
@@ -97,9 +97,9 @@ use the predict result $\hat{t}$ of the listener agent as the
 evidence of whether giving the positive rewards. Then, the gradients of the
 expected reward $ J(\theta_S, \theta_L)$ can be calculated as follows:
 \begin{align}
-  \nabla_{\theta^S} J &= \mathbb{E}_{\pi^S, \pi^L} \left[ R(\hat{t}, t) \cdot
+  \nabla_{\theta^S} J &= \mathbb{E}_{\pi^S, \pi^L} \left[ r(\hat{t}, t) \cdot
    \nabla_{\theta^S} \log{\pi^S(s_0, s_1 | t)} \right] \\
-  \nabla_{\theta^L} J &= \mathbb{E}_{\pi^S, \pi^L} \left[ R(\hat{t}, t) \cdot
+  \nabla_{\theta^L} J &= \mathbb{E}_{\pi^S, \pi^L} \left[ r(\hat{t}, t) \cdot
    \nabla_{\theta^L} \log{\pi^S(\hat{t} | s_0, s_1)} \right]
 \end{align}

@@ -116,8 +116,8 @@ expected reward $ J(\theta_S, \theta_L)$ can be calculated as follows:
        \STATE Sample $s_0$ with $P(s_0|t)$, $s_1$ with $P(s_1|t)$
        \STATE $P(\hat{t}|s) = \pi^L(\hat{t}|s)$ 
        \STATE Sample $\hat{t}$ with $P(\hat{t}|s)$
-        \STATE Get reward $R(\hat{t},t)$
-        \STATE $J(\theta^S,\theta^L)=E_{\pi_{old}^S,\pi^L}[R(\hat{t},t)\cdot\frac{\pi^S(s|t)}{\pi^S_{old}(s|t)}]$
+        \STATE Get reward $r(\hat{t},t)$
+        \STATE $J(\theta^S,\theta^L)=E_{\pi_{old}^S,\pi^L}[r(\hat{t},t)\cdot\frac{\pi^S(s|t)}{\pi^S_{old}(s|t)}]$
        \STATE Update $\theta^S$ by $\bigtriangledown_{\theta^S}J$
        \ENDFOR
        \STATE $\pi_{old}^S\leftarrow \pi^S$
@@ -131,8 +131,8 @@ expected reward $ J(\theta_S, \theta_L)$ can be calculated as follows:
 		\STATE Sample $s_0$ with $P(s_0|t)$, $s_1$ with $P(s_1|t)$
 		\STATE $P(\hat{t}|s) = \pi^L_{old}(\hat{t}|s)$ 
 		\STATE Sample $\hat{t}$ with $P(\hat{t}|s)$
-		\STATE Get reward $R(\hat{t},t)$
-		\STATE $J(\theta^S,\theta^L)=E_{\pi_{old}^S,\pi^L}[R(\hat{t},t)\cdot\frac{\pi^L(s|t)}{\pi^L_{old}(s|t)}]$
+		\STATE Get reward $r(\hat{t},t)$
+		\STATE $J(\theta^S,\theta^L)=E_{\pi_{old}^S,\pi^L}[r(\hat{t},t)\cdot\frac{\pi^L(s|t)}{\pi^L_{old}(s|t)}]$
 		\STATE Update $\theta^L$ by $\bigtriangledown_{\theta^L}J$
 		\ENDFOR
 		\STATE $\pi_{old}^L\leftarrow \pi^L$

--- a/AAAI2021/tex/theory2.tex
+++ b/AAAI2021/tex/theory2.tex
@@ -36,7 +36,7 @@ Each column of $M$ correspond to the semantic information carried by one symbol.

 \begin{figure}[t]
  \centering
-  \includegraphics[width=\columnwidth]{fig/Figure5_An_emergent_language.pdf}
+  \includegraphics[width=0.8\columnwidth]{fig/Figure5_An_emergent_language.pdf}
  \caption{An emergent language that the unilateral metrics cannot measure its non-compositionality. Notice that given $s_1 = \mathrm{a}$, the listener can neither determine the shape nor the color without the knowledge about $s_0$.}
  \label{fig:unilateral}
 \end{figure}