Merge branch 'master' of http://62.234.201.16/hao/AAAI21_Emergent_language

f40a1f4e · YZhao · 179ff574 · 57dfb313 · f40a1f4e · f40a1f4e
Commit f40a1f4e authored Sep 10, 2020 by YZhao
Showing with 203 additions and 134 deletions

AAAI2021/paper.tex
+7 -39

AAAI2021/ref.bib
+50 -0

AAAI2021/tex/introduction.tex
+43 -42

AAAI2021/tex/relatedwork.tex
+5 -4

AAAI2021/tex/theory.tex
+98 -49

模型表征能力与语言组合程度的关系.docx
+0 -0

No files found.
--- a/AAAI2021/paper.tex
+++ b/AAAI2021/paper.tex
@@ -8,6 +8,8 @@
 \newcommand{\rmk}[1]{\textcolor{red}{--[#1]--}}
 \newcommand{\note}[1]{\textcolor{red}{#1}}
 \usepackage{enumitem}
+\usepackage{amsmath}
+\usepackage{amsfonts}
 \usepackage{aaai21}  % DO NOT CHANGE THIS
 \usepackage{times}  % DO NOT CHANGE THIS
@@ -95,7 +97,7 @@
 % articles, conjunctions, and prepositions are lower case unless they
 % directly follow a colon or long dash
-\title{Revisiting the Natural Emergence of Symbolic Language with Agent Capacity}
+\title{Enabling the Emergence of Symbolic Language without Handcrafted Inductions}
 \author{
    %Authors
    % All authors must be in the same font size and format.
@@ -177,7 +179,7 @@
  inductions. 
  In this paper, we are the first to successfully achieve high compositional symbolic
-  language in a \emph{natural} manner.
+  language in a \emph{natural} manner without handcrafted inductions.
  Initially, by thoroughly investigating the compositionality of emerged symbolic
  language after removing the \emph{deliberately handcrafted}
  inductions, we observe that the agent capacity plays a key role in
@@ -194,7 +196,7 @@
  experimental results lead to a counter-intuitive conclusion that lower agent
  capacity facilitates the emergence of symbolic language with higher
  compositionality. \note{Based on our conclusion, we can generate higher
-  compositional symbolic language with a high probability.}
+  compositional symbolic language with a higher probability.}
 %  The natural emergence of symbolic languages with high compositionality has
@@ -224,42 +226,8 @@
 \input{tex/experiments.tex}
 \input{tex/last.tex}
-\begin{algorithm}[!h]
+\clearpage
-	\caption{OurAlgorithm$(t,\hat{t})$}
+\newpage
-	\begin{algorithmic}[1]
-		\IF{Training the speaker agent S}
-		\FOR{Batch T randomly selected from $M_0\times M_1$}
-        \FOR{$t=(c_0,c_1)$ in T}
-        \STATE $P(s_0|t),P(s_1|t)=\pi_{old}^S(s=(s_0,s_1)|t)$
-        \STATE Sample $s_0$ with $P(s_0|t)$, $s_1$ with $P(s_1|t)$
-        \STATE $P(\hat{t}|s) = \pi^L(\hat{t}|s)$ 
-        \STATE Sample $\hat{t}$ with $P(\hat{t}|s)$
-        \STATE Get reward $R(\hat{t},t)$
-        \STATE $J(\theta^S,\theta^L)=E_{\pi_{old}^S,\pi^L}[R(\hat{t},t)\cdot\frac{\pi^S(s|t)}{\pi^S_{old}(s|t)}]$
-        \STATE Update $\theta^S$ by $\bigtriangledown_{\theta^S}J$
-        \ENDFOR
-        \STATE $\pi_{old}^S\leftarrow \pi^S$
-		\ENDFOR
-		\ENDIF
-		\IF{Training the listener agent L}
-		\FOR{Batch T randomly selected from $M_0\times M_1$}
-		\FOR{$t=(c_0,c_1)$ in T}
-		\STATE $P(s_0|t),P(s_1|t)=\pi^S(s=(s_0,s_1)|t)$
-		\STATE Sample $s_0$ with $P(s_0|t)$, $s_1$ with $P(s_1|t)$
-		\STATE $P(\hat{t}|s) = \pi^L_{old}(\hat{t}|s)$ 
-		\STATE Sample $\hat{t}$ with $P(\hat{t}|s)$
-		\STATE Get reward $R(\hat{t},t)$
-		\STATE $J(\theta^S,\theta^L)=E_{\pi_{old}^S,\pi^L}[R(\hat{t},t)\cdot\frac{\pi^L(s|t)}{\pi^L_{old}(s|t)}]$
-		\STATE Update $\theta^L$ by $\bigtriangledown_{\theta^L}J$
-		\ENDFOR
-		\STATE $\pi_{old}^L\leftarrow \pi^L$
-		\ENDFOR
-		\ENDIF
-	\end{algorithmic}
-\end{algorithm}
 \bibliography{ref.bib}
 \end{document}
--- a/AAAI2021/ref.bib
+++ b/AAAI2021/ref.bib
@@ -11,4 +11,53 @@
  timestamp = {Thu, 04 Apr 2019 13:20:09 +0200},
  biburl    = {https://dblp.org/rec/bib/conf/iclr/WuLCS18},
  bibsource = {dblp computer science bibliography, https://dblp.org}
+}
+@inproceedings{kottur-etal-2017-natural,
+    title = "Natural Language Does Not Emerge {`}Naturally{'} in Multi-Agent Dialog",
+    author = "Kottur, Satwik  and
+      Moura, Jos{\'e}  and
+      Lee, Stefan  and
+      Batra, Dhruv",
+    booktitle = "Proceedings of the 2017 Conference on Empirical Methods in Natural Language Processing",
+    month = sep,
+    year = "2017",
+    address = "Copenhagen, Denmark",
+    publisher = "Association for Computational Linguistics",
+    url = "https://www.aclweb.org/anthology/D17-1321",
+    doi = "10.18653/v1/D17-1321",
+    pages = "2962--2967",
+    abstract = "A number of recent works have proposed techniques for end-to-end learning of communication protocols among cooperative multi-agent populations, and have simultaneously found the emergence of grounded human-interpretable language in the protocols developed by the agents, learned without any human supervision! In this paper, using a Task {\&} Talk reference game between two agents as a testbed, we present a sequence of {`}negative{'} results culminating in a {`}positive{'} one {--} showing that while most agent-invented languages are effective (i.e. achieve near-perfect task rewards), they are decidedly not interpretable or compositional. In essence, we find that natural language does not emerge {`}naturally{'},despite the semblance of ease of natural-language-emergence that one may gather from recent literature. We discuss how it is possible to coax the invented languages to become more and more human-like and compositional by increasing restrictions on how two agents may communicate.",
+}
+@article{kirby2015compression,
+  title={Compression and communication in the cultural evolution of linguistic structure},
+  author={Kirby, Simon and Tamariz, Monica and Cornish, Hannah and Smith, Kenny},
+  journal={Cognition},
+  volume={141},
+  pages={87--102},
+  year={2015},
+  publisher={Elsevier}
+}
+@inproceedings{lazaridou2018emergence,
+  title={Emergence of Linguistic Communication from Referential Games with Symbolic and Pixel Input},
+  author={Lazaridou, Angeliki and Hermann, Karl Moritz and Tuyls, Karl and Clark, Stephen},
+  booktitle={International Conference on Learning Representations},
+  year={2018}
+}
+@inproceedings{li2019ease,
+  title={Ease-of-teaching and language structure from emergent communication},
+  author={Li, Fushan and Bowling, Michael},
+  booktitle={Advances in Neural Information Processing Systems},
+  pages={15851--15861},
+  year={2019}
+}
+@inproceedings{evtimova2018emergent,
+  title={Emergent Communication in a Multi-Modal, Multi-Step Referential Game},
+  author={Evtimova, Katrina and Drozdov, Andrew and Kiela, Douwe and Cho, Kyunghyun},
+  booktitle={International Conference on Learning Representations},
+  year={2018}
 }
\ No newline at end of file
--- a/AAAI2021/tex/introduction.tex
+++ b/AAAI2021/tex/introduction.tex
 \section{Introduction}
 \label{sec:introduction}
-The emergence of symbolic language has always been an important and controversial
+The emergence of symbolic language has always been an important issue, 
-issue. This problem attracts attentions from a broad range of communities,
+which attracts attentions from a broad range of communities,
 including philology~\cite{}, biology~\cite{}, and computer
 science~\cite{}. Especially in computer science, efforts in recent years try to explore
-the emergence of symbolic language in virtual, multi-agent environments, where
+the emergence of symbolic language in virtual multi-agent environments, where
-agents are trained to communicate with neural network based method, i.e., deep
+agents are trained to communicate with neural network based methods such as deep
-reinforcement learning~\cite{}. For example, \note{XXXX}
+reinforcement learning~\cite{}. 
 %Such works can be roughly classified into two categories,
 %referential game~\cite{} and multi-agent reinforcement learning (MARL)~\cite{}, based on
 %the environment setting.  
-Compositionality is widely used and
+The quality of emergent symbolic language is typically measured by its \emph{compositionality}.
-taken as an important metric to evaluate the emerged symbolic language.
+Compositionality is a principle that determines
-Originally, compositionality is a principle that
+whether the meaning of a complex expression (e.g, phrase), which is assembled out of a
-whether the meaning of a complex expression (e.g, phase), which is assembled out of the
 given set of simple components (e.g., symbols), can be determined by its
 constituent components and the rules that combines them~\cite{}.
 \note{For example, the expression "AAAI is a conference'' consists of two
-meaningful words ``AAAI'' and ``conference'', and a rule for definition (``is'').}
+meaningful words ``AAAI'' and ``conference'', and a rule for definition (``is'').
-More recently, measuring the compositionality \note{xxxxx}.
+More recently, measuring the compositionality \note{xxxxx}.}
 %It
@@ -38,45 +37,46 @@ More recently, measuring the compositionality \note{xxxxx}.
  \centering
  \includegraphics[width=0.9\columnwidth]{fig/occupy}
  \caption{\rmk{compositionality.}}
-  \label{fig:symbols}
+  \label{fig:induction}
  \end{figure}
-Prior studies focus on achieving high compositionality
+Prior studies focus on achieving high compositional symbolic language 
 through \emph{deliberately handcrafted} inductions, e.g., small vocabulary
 sizes~\cite{}, memoryless~\cite{}, carefully constructed rewards~\cite{}, and
-ease-of-teaching~\cite{}. \note{xxxxxxx}
+ease-of-teaching~\cite{}. \note{The possible intuition is that high compositional symbolic
-However, these unnatural inductions prevent us from better understanding the mystery of
+language cannot emerge without induction in existing multi-agent environment.}
+Figure~\ref{fig:induction} reports the compositionality when training two agents in the widely-used
+listener-speaker referential game, and it can be observed that \note{the compositionality
+of emerged symbolic language is extremely low without any induction}.
+Though such unnatural inductions are useful, they prevent us from better understanding the mystery of
 the emergence of language and even intelligence among our pre-human ancestors.
 Yet, few works investigate the emergence of high compositional symbolic language
-\emph{naturally}, i.e., without \emph{deliberately
+\emph{naturally}, i.e., without handcrafted inductions.
-  handcrafted} inductions.
+In other words, it is never clear whether \emph{natural}
-As a results, it is never clear whether \emph{natural}
+environment and agents are sufficient for achieving high compositionality. 
-environment and agent are sufficient for compositionality. 
-In this work, we focus on the natural emergence of high compositional symbolic language
+In this paper, we are the first work to achieve high compositional
-naturally without any handcrafted induction.
+symbolic language without any deliberately handcrafted induction. The key observation
-Initially, we thoroughly analyze the compositionality of emerged symbolic
+is that the internal \emph{agent capacity} plays a crucial role in the compositionality 
-language after removing the \emph{deliberately handcrafted}
+of symbolic language,
-inductions. Figure~\ref{fig:comp} reports the compositionality when train two
+by thoroughly analyzing the compositionality after removing the inductions in 
-agents in a listener-speaker referential game. It can be observed that \note{it
+the most widely-used listener-speaker referential game framework.
-  is challenging to achieve high compositionality without induction as
+Concretely, the relationship between the agent capacity and the compositionality 
-  xxxxxx}. Moreover, we observe that the agent capacity plays a key role in
+of symbolic language is characterized both theoretically and experimentally.
-compositionality, see Figure xxx.
-We reveal and characterize the quantitative relationship
-between the agent capacity and the compositionality of symbolic language both
-theoretically and experimentally.
 %theoretically
 Regarding the theoretical analysis, we use the
-Markov Series Channel (MSC)~\cite{} to model the language transmission process and a
+\note{Markov Series Channel (MSC)~\cite{} to model the language transmission process and a
-novel mutual information-based metric to measure the compositionality quantitatively. 
+novel mutual information-based metric to measure the compositionality quantitatively}. 
 %experimentally
-Regarding the experimental verification, it is conducted on a listener-speaker
+Regarding the experimental validation, two different dedicated experiments, i.e.,
-referential game framework with eliminated unnatural inductions.
+\note{XXX and XXX, are utilized for XXX}.
-Both theoretical analysis and
+%Regarding the experimental validation, it is conducted on a listener-speaker
-  experimental results lead to a counter-intuitive conclusion that lower agent
+%referential game framework with eliminated unnatural inductions.
-  capacity facilitates the emergence of symbolic language with higher
+Both the theoretical analysis and experimental results lead to a counter-intuitive 
-  compositionality.
+conclusion that \emph{lower agent capacity facilitates the emergence of symbolic language 
+with higher compositionality}. \note{Therefore, by only reducing the agent capacity
+in such a natural environment, we 
+can generate a higher compositional symbolic language with a higher probability.}
 %Prior studies focus on investigating how to affect the
@@ -167,8 +167,9 @@ Both theoretical analysis and
 In this paper, we made the following contributions:
 \begin{itemize}[topsep=0pt,itemsep=0cm]
-\item We are the first to successfully achieve high compositional symbolic
+\item To our best knowledge, we are the first work to successfully achieve 
-  language naturally, without any deliberately handcrafted inductions.
+high compositional symbolic
+  language naturally, without any deliberately handcrafted induction.
 \item We thoroughly analyze the compositionality of emerged symbolic language
  after removing deliberately handcrafted inductions, and confirm that the agent
  capacity acts as a key factor for compositionality.

--- a/AAAI2021/tex/relatedwork.tex
+++ b/AAAI2021/tex/relatedwork.tex
@@ -4,10 +4,11 @@
 %external environmental factors
 Previous works focus on the external environmental factors that impact the
 compositionality of emerged symbolic language. 
-For example, XXX proposed small vocabulary sizes~\cite{}.
+For example, ~\citet{kirby2015compression} explored how the pressures for compressivity and compressibility lead the structured language.
-XXX proposed memoryless~\cite{}.
+~\citet{kottur-etal-2017-natural} constrained the vocabulary size and whether the listener has memory to coax the compositionality of the emergent language.
-XXX proposed carefully constructed distractors~\cite{}.
+~\citet{lazaridou2018emergence} showed that the degree of structure found in the input data affects the emergence of the symbolic language.
-XXX proposed ease-of-teaching~\cite{}.
+~\citet{li2019ease} studied how the pressure, ease of teaching, impact on the iterative language of the population regime.
+~\citet{evtimova2018emergent} designed a novel multi-modal scenarios, which the speaker and the listener should access to different modalities of the input object, to explore the language emergence.
 Such factors are deliberately designed, which are too ideal to be true in
 the real world. None of these works realizes the importance of model capacity of
 agent itself. \rmk{this should be largely emphasized.}

--- a/AAAI2021/tex/theory.tex
+++ b/AAAI2021/tex/theory.tex
@@ -24,35 +24,22 @@ In this paper, the task is xxxx.
 \textbf{Game rules} In our referential game, agents follow the following rules
 to finish the game in a cooperatively manner. In each round，once received an
 input object $t$, Speaker $S$ speaks a symbol sequence $s$ to Listener $L$ ;
-Listener $L$ reconstruct the predict result $\hat{t}$ based on the listened
+Listener $L$ reconstruct the predicted result $\hat{t}$ based on the listened
 sequence $s$; if $t=\hat{t}$, agents win this game and receive positive rewards
 ($R(t,\hat{t})=1$); otherwise agents fail this game and receive negative rewards
 ($R(t,\hat{t})=-1$).
-Precisely, 
+Precisely, during the game, Speaker $S$ receives an input object $t$, which is
+an expression with two words from the vocabulary set $V$, i.e., two
+one-hot vector representing shape and color, respectively. Based on the $t$,
-An input object t is a concept sequence with fixed length, denoted
+Speaker $S$ speaks a symbol sequence $s$, which similarly contains two words
-$t=(c_0,c_1)$.
+from $V$. The Listener $L$ receives $s$ and output predicted result $\hat{t}$,
+a single word (one-hot vector) selected from the Cartesian product of set two $V$s
-The concept $c_0(shape)$ and $c_1(color)$ are indicated as a
+($V\times V$), which representing all the meanings of two combined words from $V$.
-one-hot vector respectively.
+Please note that since $t$ and $\hat{t}$ have different length, we say
-The length of each one-hot vector ranges from 3 to 6.
+$t=\hat{t}$ if $t$ expresses the same meaning as $\hat{t}$, e.g.,
-These two vectors are concatenated to denote the input object t.
+$t={[0,0,1],[0,1,0]}$ would be equal to $\hat{t}=[0,0,0,0,0,1]$ if they both mean ``red
-Each symbol sequence s contains two words, denoted $(s_0,s_1)$. Each word $s_i$
+circle''. 
-is chosen in the vocabulary set $V$. In this game, let the card $|V|$ range from
-4 to 10, and the inequation $|V|^2\geq|M_1||M_1|$ is satisfied to ensure the
-symbol sequence $(s_0,s_1)$ can be used to denote all the input object t. The
-one-hot vector with the length $|V|$ is used to indicate the word $s_0$ and
-$s_1$ respectively. Then, the two one-hot vectors are concatenated to denote the
-symbol sequence s.
-The predict result $\hat{t}$ is denoted as a one-hot vector with the length
-$|M_0||M_1|$. Each bit of the one-hot vector denotes one input object. If the
-predict result $\hat{t}[i*|M_1|+j]=1$, the one-hot vector of each predict
-concept $\hat{c}_0$ and $\hat{c}_1$ respectively satisfied $\hat{c}_0[i]=1$ and
-$\hat{c}_1[j]=1$.
-If $(c_0,c_1)$ is equal to $(\hat{c}_0,\hat{c}_1)$, the input object and the
-predict result indicate the same object.
@@ -67,29 +54,91 @@ predict result indicate the same object.
  \label{fig:agents}
 \end{figure}
-The agents apply their own policy to play the referential game. Denote the
+Figure~\ref{fig:agents} shows the architecture of the constructed agents,
-policy of the speaker agent S and the listener L as $\pi_S$ and $\pi_L$. $\pi_S$
+including the Speaker $S$ and Listener $L$. 
-indicates the conditional probability $P(s_0|t)$ and $P(s_1|t)$. $\pi_L$
-indicates the conditional probability $P(\hat{t}|s_0,s_1)$. The listener agent
+\textbf{Speaker.} Regarding the Speaker $S$, it is constructed as a three-layer neural
-output predict result $\hat{t}$ through random sampling on the conditional
+network. The Speaker $S$ processes the input object $t$ with a fully-connected
-probability $P(\hat{t}|s_0,s_1)$. The neural networks are used to simulate the
+layer to obtain the hidden layer $h^s$, which is split into two sub-layers. Each
-agent policy. The agent architecture is shown in Figure 1.
+sub-layer is further processed with fully-connected layers to obtain the output
-For the speaker, the input object t is firstly passed to a MLP to get a hidden
+layer. The output layer results indicate the probability distribution of symbols
-layer vector $h^S$. Then, the hidden layer vector is split into two feature
+with given input object $t$, i.e., $o_i^{s}=P(s_i|t)$ $i\in{0,1}$. \note{The final
-vectors $h_0^S$ and $h_1^S$ with length h\_size. Through a MLP and a softmax layer,
+readout symbols are sampled based on such probability distribution.}
-these feature vectors are transformed as the output $o_0$ and $o_1$ with the length
-|V| respectively. Lastly, the symbol sequences $s_0$ and $s_1$ are sampled from the
+\textbf{Listener.} Regarding the Listener $L$, it is constructed as a
-output $o_0$ and $o_1$.
+three-layer neural network, too. Different from Speaker $S$ that split the
-For the listener, the input symbol sequences $s_0$ and $s_1$ are passed into a MLP
+hidden layer into two sub-layers, $L$ concatenates two sub-layers into one
-respectively to get the hidden layer vectors $h_0$ and $h_1$. The length of each
+output layer. The output layer results are also the probability distribution of
-vector is h\_size. Concatenating these vectors, and passing the conjunctive
+symbols $\hat{t}$ with given input sequence $s$, i.e, $o^{L}=P(\hat{t}|s_0,s_1)$.
-vector into a MLP and a softmax layer, the output $o^L$  with length $|M_0||M_1|$
+\note{The final readout symbol is sampled based the probability.}
-denotes $P(\hat{t}|s_0,s_1)$. Lastly, the predict result is sampled from the
-output $o^L$.
-In the experiments, the symbol h\_size is used to denote the model capacity of
-the agents.
+\subsection{Learning algorithm}
-\subsection{Training algorithm}
 \label{ssec:training}
+To remove all the handcrafted induction as well as for a more realistic
+scenario, agents for this referential game are independent to each other,
+without sharing model parameters or architectural connections. As shown in
+Algorithm~\ref{al:learning}, we train the separate Speaker $S$ and Listener $L$ with
+Stochastic Policy Gradient methodology in a tick-tock manner, i.e, training one
+agent while keeping the other one. Roughly, when training the Speaker, the
+target is set to maximize the expected reward
+$J(\theta_S, \theta_L)=E_{\pi_S,\pi_L}[R(t, t^)]$ by adjusting the parameter
+$\theta_S$, where $\theta_S$ is the neural network parameters of Speaker $S$
+with learned output probability distribution $\pi_S$, and $\theta_L$ is the
+neural network parameters of Listener with learned probability distribution $\pi_L$.
+Similarly, when training the Listener, the target is set to maximize the
+expected reward$ J(theta_S, theta_L)$ by fixing the parameter $\theta_S$ and
+adjusting the parameter $\theta_L$.
+Additionally, to avoid the handcrafted induction on emergent language, we only
+use the predict result $\hat{t}$ of the listener agent as the 
+evidence of whether giving the positive rewards. Then, the gradients of the
+expected reward $ J(theta_S, theta_L)$ can be calculated as follows:
+\begin{align}
+  \nabla_{\theta^S} J &= \mathbb{E}_{\pi^S, \pi^L} \left[ R(\hat{t}, t) \cdot
+    \nabla_{\theta^S} \log{\pi^S(s_0, s_1 | t)} \right] \\
+  \nabla_{\theta^L} J &= \mathbb{E}_{\pi^S, \pi^L} \left[ R(\hat{t}, t) \cdot
+    \nabla_{\theta^L} \log{\pi^S(\hat{t} | s_0, s_1)} \right]
+\end{align}
+\begin{algorithm}[t]
+  \caption{Learning Algorithm$(t,\hat{t})$}
+  \label{al:learning}
+  \small
+	\begin{algorithmic}[1]
+		\IF{Training the speaker agent S}
+		\FOR{Batch T randomly selected from $M_0\times M_1$}
+        \FOR{$t=(c_0,c_1)$ in T}
+        \STATE $P(s_0|t),P(s_1|t)=\pi_{old}^S(s=(s_0,s_1)|t)$
+        \STATE Sample $s_0$ with $P(s_0|t)$, $s_1$ with $P(s_1|t)$
+        \STATE $P(\hat{t}|s) = \pi^L(\hat{t}|s)$ 
+        \STATE Sample $\hat{t}$ with $P(\hat{t}|s)$
+        \STATE Get reward $R(\hat{t},t)$
+        \STATE $J(\theta^S,\theta^L)=E_{\pi_{old}^S,\pi^L}[R(\hat{t},t)\cdot\frac{\pi^S(s|t)}{\pi^S_{old}(s|t)}]$
+        \STATE Update $\theta^S$ by $\bigtriangledown_{\theta^S}J$
+        \ENDFOR
+        \STATE $\pi_{old}^S\leftarrow \pi^S$
+		\ENDFOR
+		\ENDIF
+		\IF{Training the listener agent L}
+		\FOR{Batch T randomly selected from $M_0\times M_1$}
+		\FOR{$t=(c_0,c_1)$ in T}
+		\STATE $P(s_0|t),P(s_1|t)=\pi^S(s=(s_0,s_1)|t)$
+		\STATE Sample $s_0$ with $P(s_0|t)$, $s_1$ with $P(s_1|t)$
+		\STATE $P(\hat{t}|s) = \pi^L_{old}(\hat{t}|s)$ 
+		\STATE Sample $\hat{t}$ with $P(\hat{t}|s)$
+		\STATE Get reward $R(\hat{t},t)$
+		\STATE $J(\theta^S,\theta^L)=E_{\pi_{old}^S,\pi^L}[R(\hat{t},t)\cdot\frac{\pi^L(s|t)}{\pi^L_{old}(s|t)}]$
+		\STATE Update $\theta^L$ by $\bigtriangledown_{\theta^L}J$
+		\ENDFOR
+		\STATE $\pi_{old}^L\leftarrow \pi^L$
+		\ENDFOR
+		\ENDIF
+	\end{algorithmic}
+\end{algorithm}
--- a/模型表征能力与语言组合程度的关系.docx
+++ b/模型表征能力与语言组合程度的关系.docx