Merge branch 'master' of http://62.234.201.16/hao/AAAI21_Emergent_language

f40a1f4e · YZhao · 179ff574 · 57dfb313 · f40a1f4e · f40a1f4e
Commit f40a1f4e authored Sep 10, 2020 by YZhao
Showing with 203 additions and 134 deletions

AAAI2021/paper.tex
+7 -39

AAAI2021/ref.bib
+50 -0

AAAI2021/tex/introduction.tex
+43 -42

AAAI2021/tex/relatedwork.tex
+5 -4

AAAI2021/tex/theory.tex
+98 -49

模型表征能力与语言组合程度的关系.docx
+0 -0

No files found.
--- a/AAAI2021/paper.tex
+++ b/AAAI2021/paper.tex
@@ -8,6 +8,8 @@
 \newcommand{\rmk}[1]{\textcolor{red}{--[#1]--}}
 \newcommand{\note}[1]{\textcolor{red}{#1}}
 \usepackage{enumitem}
+\usepackage{amsmath}
+\usepackage{amsfonts}

 \usepackage{aaai21}  % DO NOT CHANGE THIS
 \usepackage{times}  % DO NOT CHANGE THIS
@@ -95,7 +97,7 @@
 % articles, conjunctions, and prepositions are lower case unless they
 % directly follow a colon or long dash

-\title{Revisiting the Natural Emergence of Symbolic Language with Agent Capacity}
+\title{Enabling the Emergence of Symbolic Language without Handcrafted Inductions}
 \author{
    %Authors
    % All authors must be in the same font size and format.
@@ -177,7 +179,7 @@
  inductions. 
  
  In this paper, we are the first to successfully achieve high compositional symbolic
-  language in a \emph{natural} manner.
+  language in a \emph{natural} manner without handcrafted inductions.
  Initially, by thoroughly investigating the compositionality of emerged symbolic
  language after removing the \emph{deliberately handcrafted}
  inductions, we observe that the agent capacity plays a key role in
@@ -194,7 +196,7 @@
  experimental results lead to a counter-intuitive conclusion that lower agent
  capacity facilitates the emergence of symbolic language with higher
  compositionality. \note{Based on our conclusion, we can generate higher
-  compositional symbolic language with a high probability.}
+  compositional symbolic language with a higher probability.}


 %  The natural emergence of symbolic languages with high compositionality has
@@ -224,42 +226,8 @@
 \input{tex/experiments.tex}
 \input{tex/last.tex}

-\begin{algorithm}[!h]
-	\caption{OurAlgorithm$(t,\hat{t})$}
-	\begin{algorithmic}[1]
-		\IF{Training the speaker agent S}
-		\FOR{Batch T randomly selected from $M_0\times M_1$}
-        \FOR{$t=(c_0,c_1)$ in T}
-        \STATE $P(s_0|t),P(s_1|t)=\pi_{old}^S(s=(s_0,s_1)|t)$
-        \STATE Sample $s_0$ with $P(s_0|t)$, $s_1$ with $P(s_1|t)$
-        \STATE $P(\hat{t}|s) = \pi^L(\hat{t}|s)$ 
-        \STATE Sample $\hat{t}$ with $P(\hat{t}|s)$
-        \STATE Get reward $R(\hat{t},t)$
-        \STATE $J(\theta^S,\theta^L)=E_{\pi_{old}^S,\pi^L}[R(\hat{t},t)\cdot\frac{\pi^S(s|t)}{\pi^S_{old}(s|t)}]$
-        \STATE Update $\theta^S$ by $\bigtriangledown_{\theta^S}J$
-        \ENDFOR
-        \STATE $\pi_{old}^S\leftarrow \pi^S$
-		\ENDFOR
-		\ENDIF
-	
-		\IF{Training the listener agent L}
-		\FOR{Batch T randomly selected from $M_0\times M_1$}
-		\FOR{$t=(c_0,c_1)$ in T}
-		\STATE $P(s_0|t),P(s_1|t)=\pi^S(s=(s_0,s_1)|t)$
-		\STATE Sample $s_0$ with $P(s_0|t)$, $s_1$ with $P(s_1|t)$
-		\STATE $P(\hat{t}|s) = \pi^L_{old}(\hat{t}|s)$ 
-		\STATE Sample $\hat{t}$ with $P(\hat{t}|s)$
-		\STATE Get reward $R(\hat{t},t)$
-		\STATE $J(\theta^S,\theta^L)=E_{\pi_{old}^S,\pi^L}[R(\hat{t},t)\cdot\frac{\pi^L(s|t)}{\pi^L_{old}(s|t)}]$
-		\STATE Update $\theta^L$ by $\bigtriangledown_{\theta^L}J$
-		\ENDFOR
-		\STATE $\pi_{old}^L\leftarrow \pi^L$
-		\ENDFOR
-		\ENDIF
-	\end{algorithmic}
-\end{algorithm}
-
-
+\clearpage
+\newpage
 \bibliography{ref.bib}

 \end{document}
--- a/AAAI2021/ref.bib
+++ b/AAAI2021/ref.bib
@@ -12,3 +12,52 @@
  biburl    = {https://dblp.org/rec/bib/conf/iclr/WuLCS18},
  bibsource = {dblp computer science bibliography, https://dblp.org}
 }
+
+@inproceedings{kottur-etal-2017-natural,
+    title = "Natural Language Does Not Emerge {`}Naturally{'} in Multi-Agent Dialog",
+    author = "Kottur, Satwik  and
+      Moura, Jos{\'e}  and
+      Lee, Stefan  and
+      Batra, Dhruv",
+    booktitle = "Proceedings of the 2017 Conference on Empirical Methods in Natural Language Processing",
+    month = sep,
+    year = "2017",
+    address = "Copenhagen, Denmark",
+    publisher = "Association for Computational Linguistics",
+    url = "https://www.aclweb.org/anthology/D17-1321",
+    doi = "10.18653/v1/D17-1321",
+    pages = "2962--2967",
+    abstract = "A number of recent works have proposed techniques for end-to-end learning of communication protocols among cooperative multi-agent populations, and have simultaneously found the emergence of grounded human-interpretable language in the protocols developed by the agents, learned without any human supervision! In this paper, using a Task {\&} Talk reference game between two agents as a testbed, we present a sequence of {`}negative{'} results culminating in a {`}positive{'} one {--} showing that while most agent-invented languages are effective (i.e. achieve near-perfect task rewards), they are decidedly not interpretable or compositional. In essence, we find that natural language does not emerge {`}naturally{'},despite the semblance of ease of natural-language-emergence that one may gather from recent literature. We discuss how it is possible to coax the invented languages to become more and more human-like and compositional by increasing restrictions on how two agents may communicate.",
+}
+
+@article{kirby2015compression,
+  title={Compression and communication in the cultural evolution of linguistic structure},
+  author={Kirby, Simon and Tamariz, Monica and Cornish, Hannah and Smith, Kenny},
+  journal={Cognition},
+  volume={141},
+  pages={87--102},
+  year={2015},
+  publisher={Elsevier}
+}
+
+@inproceedings{lazaridou2018emergence,
+  title={Emergence of Linguistic Communication from Referential Games with Symbolic and Pixel Input},
+  author={Lazaridou, Angeliki and Hermann, Karl Moritz and Tuyls, Karl and Clark, Stephen},
+  booktitle={International Conference on Learning Representations},
+  year={2018}
+}
+
+@inproceedings{li2019ease,
+  title={Ease-of-teaching and language structure from emergent communication},
+  author={Li, Fushan and Bowling, Michael},
+  booktitle={Advances in Neural Information Processing Systems},
+  pages={15851--15861},
+  year={2019}
+}
+
+@inproceedings{evtimova2018emergent,
+  title={Emergent Communication in a Multi-Modal, Multi-Step Referential Game},
+  author={Evtimova, Katrina and Drozdov, Andrew and Kiela, Douwe and Cho, Kyunghyun},
+  booktitle={International Conference on Learning Representations},
+  year={2018}
+}
\ No newline at end of file
--- a/AAAI2021/tex/introduction.tex
+++ b/AAAI2021/tex/introduction.tex
 \section{Introduction}
 \label{sec:introduction}

-The emergence of symbolic language has always been an important and controversial
-issue. This problem attracts attentions from a broad range of communities,
+The emergence of symbolic language has always been an important issue, 
+which attracts attentions from a broad range of communities,
 including philology~\cite{}, biology~\cite{}, and computer
 science~\cite{}. Especially in computer science, efforts in recent years try to explore
-the emergence of symbolic language in virtual, multi-agent environments, where
-agents are trained to communicate with neural network based method, i.e., deep
-reinforcement learning~\cite{}. For example, \note{XXXX}
+the emergence of symbolic language in virtual multi-agent environments, where
+agents are trained to communicate with neural network based methods such as deep
+reinforcement learning~\cite{}. 
 %Such works can be roughly classified into two categories,
 %referential game~\cite{} and multi-agent reinforcement learning (MARL)~\cite{}, based on
 %the environment setting.  


-Compositionality is widely used and
-taken as an important metric to evaluate the emerged symbolic language.
-Originally, compositionality is a principle that
-whether the meaning of a complex expression (e.g, phase), which is assembled out of the
+The quality of emergent symbolic language is typically measured by its \emph{compositionality}.
+Compositionality is a principle that determines
+whether the meaning of a complex expression (e.g, phrase), which is assembled out of a
 given set of simple components (e.g., symbols), can be determined by its
 constituent components and the rules that combines them~\cite{}.
 \note{For example, the expression "AAAI is a conference'' consists of two
-meaningful words ``AAAI'' and ``conference'', and a rule for definition (``is'').}
-More recently, measuring the compositionality \note{xxxxx}.
+meaningful words ``AAAI'' and ``conference'', and a rule for definition (``is'').
+More recently, measuring the compositionality \note{xxxxx}.}


 %It
@@ -38,45 +37,46 @@ More recently, measuring the compositionality \note{xxxxx}.
  \centering
  \includegraphics[width=0.9\columnwidth]{fig/occupy}
  \caption{\rmk{compositionality.}}
-  \label{fig:symbols}
+  \label{fig:induction}
  \end{figure}

-Prior studies focus on achieving high compositionality
+Prior studies focus on achieving high compositional symbolic language 
 through \emph{deliberately handcrafted} inductions, e.g., small vocabulary
 sizes~\cite{}, memoryless~\cite{}, carefully constructed rewards~\cite{}, and
-ease-of-teaching~\cite{}. \note{xxxxxxx}
-However, these unnatural inductions prevent us from better understanding the mystery of
+ease-of-teaching~\cite{}. \note{The possible intuition is that high compositional symbolic
+language cannot emerge without induction in existing multi-agent environment.}
+Figure~\ref{fig:induction} reports the compositionality when training two agents in the widely-used
+listener-speaker referential game, and it can be observed that \note{the compositionality
+of emerged symbolic language is extremely low without any induction}.
+Though such unnatural inductions are useful, they prevent us from better understanding the mystery of
 the emergence of language and even intelligence among our pre-human ancestors.
 Yet, few works investigate the emergence of high compositional symbolic language
-\emph{naturally}, i.e., without \emph{deliberately
-  handcrafted} inductions.
-As a results, it is never clear whether \emph{natural}
-environment and agent are sufficient for compositionality. 
+\emph{naturally}, i.e., without handcrafted inductions.
+In other words, it is never clear whether \emph{natural}
+environment and agents are sufficient for achieving high compositionality. 

-In this work, we focus on the natural emergence of high compositional symbolic language
-naturally without any handcrafted induction.
-Initially, we thoroughly analyze the compositionality of emerged symbolic
-language after removing the \emph{deliberately handcrafted}
-inductions. Figure~\ref{fig:comp} reports the compositionality when train two
-agents in a listener-speaker referential game. It can be observed that \note{it
-  is challenging to achieve high compositionality without induction as
-  xxxxxx}. Moreover, we observe that the agent capacity plays a key role in
-compositionality, see Figure xxx.
-
-We reveal and characterize the quantitative relationship
-between the agent capacity and the compositionality of symbolic language both
-theoretically and experimentally.
+In this paper, we are the first work to achieve high compositional
+symbolic language without any deliberately handcrafted induction. The key observation
+is that the internal \emph{agent capacity} plays a crucial role in the compositionality 
+of symbolic language,
+by thoroughly analyzing the compositionality after removing the inductions in 
+the most widely-used listener-speaker referential game framework.
+Concretely, the relationship between the agent capacity and the compositionality 
+of symbolic language is characterized both theoretically and experimentally.
 %theoretically
 Regarding the theoretical analysis, we use the
-Markov Series Channel (MSC)~\cite{} to model the language transmission process and a
-novel mutual information-based metric to measure the compositionality quantitatively. 
+\note{Markov Series Channel (MSC)~\cite{} to model the language transmission process and a
+novel mutual information-based metric to measure the compositionality quantitatively}. 
 %experimentally
-Regarding the experimental verification, it is conducted on a listener-speaker
-referential game framework with eliminated unnatural inductions.
-Both theoretical analysis and
-  experimental results lead to a counter-intuitive conclusion that lower agent
-  capacity facilitates the emergence of symbolic language with higher
-  compositionality.
+Regarding the experimental validation, two different dedicated experiments, i.e.,
+\note{XXX and XXX, are utilized for XXX}.
+%Regarding the experimental validation, it is conducted on a listener-speaker
+%referential game framework with eliminated unnatural inductions.
+Both the theoretical analysis and experimental results lead to a counter-intuitive 
+conclusion that \emph{lower agent capacity facilitates the emergence of symbolic language 
+with higher compositionality}. \note{Therefore, by only reducing the agent capacity
+in such a natural environment, we 
+can generate a higher compositional symbolic language with a higher probability.}
  

 %Prior studies focus on investigating how to affect the
@@ -167,8 +167,9 @@ Both theoretical analysis and

 In this paper, we made the following contributions:
 \begin{itemize}[topsep=0pt,itemsep=0cm]
-\item We are the first to successfully achieve high compositional symbolic
-  language naturally, without any deliberately handcrafted inductions.
+\item To our best knowledge, we are the first work to successfully achieve 
+high compositional symbolic
+  language naturally, without any deliberately handcrafted induction.
 \item We thoroughly analyze the compositionality of emerged symbolic language
  after removing deliberately handcrafted inductions, and confirm that the agent
  capacity acts as a key factor for compositionality.

--- a/AAAI2021/tex/relatedwork.tex
+++ b/AAAI2021/tex/relatedwork.tex
@@ -4,10 +4,11 @@
 %external environmental factors
 Previous works focus on the external environmental factors that impact the
 compositionality of emerged symbolic language. 
-For example, XXX proposed small vocabulary sizes~\cite{}.
-XXX proposed memoryless~\cite{}.
-XXX proposed carefully constructed distractors~\cite{}.
-XXX proposed ease-of-teaching~\cite{}.
+For example, ~\citet{kirby2015compression} explored how the pressures for compressivity and compressibility lead the structured language.
+~\citet{kottur-etal-2017-natural} constrained the vocabulary size and whether the listener has memory to coax the compositionality of the emergent language.
+~\citet{lazaridou2018emergence} showed that the degree of structure found in the input data affects the emergence of the symbolic language.
+~\citet{li2019ease} studied how the pressure, ease of teaching, impact on the iterative language of the population regime.
+~\citet{evtimova2018emergent} designed a novel multi-modal scenarios, which the speaker and the listener should access to different modalities of the input object, to explore the language emergence.
 Such factors are deliberately designed, which are too ideal to be true in
 the real world. None of these works realizes the importance of model capacity of
 agent itself. \rmk{this should be largely emphasized.}

--- a/AAAI2021/tex/theory.tex
+++ b/AAAI2021/tex/theory.tex
@@ -24,35 +24,22 @@ In this paper, the task is xxxx.
 \textbf{Game rules} In our referential game, agents follow the following rules
 to finish the game in a cooperatively manner. In each round，once received an
 input object $t$, Speaker $S$ speaks a symbol sequence $s$ to Listener $L$ ;
-Listener $L$ reconstruct the predict result $\hat{t}$ based on the listened
+Listener $L$ reconstruct the predicted result $\hat{t}$ based on the listened
 sequence $s$; if $t=\hat{t}$, agents win this game and receive positive rewards
 ($R(t,\hat{t})=1$); otherwise agents fail this game and receive negative rewards
 ($R(t,\hat{t})=-1$).

-Precisely, 
-
-
-An input object t is a concept sequence with fixed length, denoted
-$t=(c_0,c_1)$.
-
-The concept $c_0(shape)$ and $c_1(color)$ are indicated as a
-one-hot vector respectively.
-The length of each one-hot vector ranges from 3 to 6.
-These two vectors are concatenated to denote the input object t.
-Each symbol sequence s contains two words, denoted $(s_0,s_1)$. Each word $s_i$
-is chosen in the vocabulary set $V$. In this game, let the card $|V|$ range from
-4 to 10, and the inequation $|V|^2\geq|M_1||M_1|$ is satisfied to ensure the
-symbol sequence $(s_0,s_1)$ can be used to denote all the input object t. The
-one-hot vector with the length $|V|$ is used to indicate the word $s_0$ and
-$s_1$ respectively. Then, the two one-hot vectors are concatenated to denote the
-symbol sequence s.
-The predict result $\hat{t}$ is denoted as a one-hot vector with the length
-$|M_0||M_1|$. Each bit of the one-hot vector denotes one input object. If the
-predict result $\hat{t}[i*|M_1|+j]=1$, the one-hot vector of each predict
-concept $\hat{c}_0$ and $\hat{c}_1$ respectively satisfied $\hat{c}_0[i]=1$ and
-$\hat{c}_1[j]=1$.
-If $(c_0,c_1)$ is equal to $(\hat{c}_0,\hat{c}_1)$, the input object and the
-predict result indicate the same object.
+Precisely, during the game, Speaker $S$ receives an input object $t$, which is
+an expression with two words from the vocabulary set $V$, i.e., two
+one-hot vector representing shape and color, respectively. Based on the $t$,
+Speaker $S$ speaks a symbol sequence $s$, which similarly contains two words
+from $V$. The Listener $L$ receives $s$ and output predicted result $\hat{t}$,
+a single word (one-hot vector) selected from the Cartesian product of set two $V$s
+($V\times V$), which representing all the meanings of two combined words from $V$.
+Please note that since $t$ and $\hat{t}$ have different length, we say
+$t=\hat{t}$ if $t$ expresses the same meaning as $\hat{t}$, e.g.,
+$t={[0,0,1],[0,1,0]}$ would be equal to $\hat{t}=[0,0,0,0,0,1]$ if they both mean ``red
+circle''. 



@@ -67,29 +54,91 @@ predict result indicate the same object.
  \label{fig:agents}
 \end{figure}

-The agents apply their own policy to play the referential game. Denote the
-policy of the speaker agent S and the listener L as $\pi_S$ and $\pi_L$. $\pi_S$
-indicates the conditional probability $P(s_0|t)$ and $P(s_1|t)$. $\pi_L$
-indicates the conditional probability $P(\hat{t}|s_0,s_1)$. The listener agent
-output predict result $\hat{t}$ through random sampling on the conditional
-probability $P(\hat{t}|s_0,s_1)$. The neural networks are used to simulate the
-agent policy. The agent architecture is shown in Figure 1.
-For the speaker, the input object t is firstly passed to a MLP to get a hidden
-layer vector $h^S$. Then, the hidden layer vector is split into two feature
-vectors $h_0^S$ and $h_1^S$ with length h\_size. Through a MLP and a softmax layer,
-these feature vectors are transformed as the output $o_0$ and $o_1$ with the length
-|V| respectively. Lastly, the symbol sequences $s_0$ and $s_1$ are sampled from the
-output $o_0$ and $o_1$.
-For the listener, the input symbol sequences $s_0$ and $s_1$ are passed into a MLP
-respectively to get the hidden layer vectors $h_0$ and $h_1$. The length of each
-vector is h\_size. Concatenating these vectors, and passing the conjunctive
-vector into a MLP and a softmax layer, the output $o^L$  with length $|M_0||M_1|$
-denotes $P(\hat{t}|s_0,s_1)$. Lastly, the predict result is sampled from the
-output $o^L$.
-In the experiments, the symbol h\_size is used to denote the model capacity of
-the agents.
-
-
-\subsection{Training algorithm}
+Figure~\ref{fig:agents} shows the architecture of the constructed agents,
+including the Speaker $S$ and Listener $L$. 
+
+\textbf{Speaker.} Regarding the Speaker $S$, it is constructed as a three-layer neural
+network. The Speaker $S$ processes the input object $t$ with a fully-connected
+layer to obtain the hidden layer $h^s$, which is split into two sub-layers. Each
+sub-layer is further processed with fully-connected layers to obtain the output
+layer. The output layer results indicate the probability distribution of symbols
+with given input object $t$, i.e., $o_i^{s}=P(s_i|t)$ $i\in{0,1}$. \note{The final
+readout symbols are sampled based on such probability distribution.}
+
+\textbf{Listener.} Regarding the Listener $L$, it is constructed as a
+three-layer neural network, too. Different from Speaker $S$ that split the
+hidden layer into two sub-layers, $L$ concatenates two sub-layers into one
+output layer. The output layer results are also the probability distribution of
+symbols $\hat{t}$ with given input sequence $s$, i.e, $o^{L}=P(\hat{t}|s_0,s_1)$.
+\note{The final readout symbol is sampled based the probability.}
+
+
+
+\subsection{Learning algorithm}
 \label{ssec:training}

+
+To remove all the handcrafted induction as well as for a more realistic
+scenario, agents for this referential game are independent to each other,
+without sharing model parameters or architectural connections. As shown in
+Algorithm~\ref{al:learning}, we train the separate Speaker $S$ and Listener $L$ with
+Stochastic Policy Gradient methodology in a tick-tock manner, i.e, training one
+agent while keeping the other one. Roughly, when training the Speaker, the
+target is set to maximize the expected reward
+$J(\theta_S, \theta_L)=E_{\pi_S,\pi_L}[R(t, t^)]$ by adjusting the parameter
+$\theta_S$, where $\theta_S$ is the neural network parameters of Speaker $S$
+with learned output probability distribution $\pi_S$, and $\theta_L$ is the
+neural network parameters of Listener with learned probability distribution $\pi_L$.
+Similarly, when training the Listener, the target is set to maximize the
+expected reward$ J(theta_S, theta_L)$ by fixing the parameter $\theta_S$ and
+adjusting the parameter $\theta_L$.
+
+Additionally, to avoid the handcrafted induction on emergent language, we only
+use the predict result $\hat{t}$ of the listener agent as the 
+evidence of whether giving the positive rewards. Then, the gradients of the
+expected reward $ J(theta_S, theta_L)$ can be calculated as follows:
+\begin{align}
+  \nabla_{\theta^S} J &= \mathbb{E}_{\pi^S, \pi^L} \left[ R(\hat{t}, t) \cdot
+    \nabla_{\theta^S} \log{\pi^S(s_0, s_1 | t)} \right] \\
+  \nabla_{\theta^L} J &= \mathbb{E}_{\pi^S, \pi^L} \left[ R(\hat{t}, t) \cdot
+    \nabla_{\theta^L} \log{\pi^S(\hat{t} | s_0, s_1)} \right]
+\end{align}
+
+
+\begin{algorithm}[t]
+  \caption{Learning Algorithm$(t,\hat{t})$}
+  \label{al:learning}
+  \small
+	\begin{algorithmic}[1]
+		\IF{Training the speaker agent S}
+		\FOR{Batch T randomly selected from $M_0\times M_1$}
+        \FOR{$t=(c_0,c_1)$ in T}
+        \STATE $P(s_0|t),P(s_1|t)=\pi_{old}^S(s=(s_0,s_1)|t)$
+        \STATE Sample $s_0$ with $P(s_0|t)$, $s_1$ with $P(s_1|t)$
+        \STATE $P(\hat{t}|s) = \pi^L(\hat{t}|s)$ 
+        \STATE Sample $\hat{t}$ with $P(\hat{t}|s)$
+        \STATE Get reward $R(\hat{t},t)$
+        \STATE $J(\theta^S,\theta^L)=E_{\pi_{old}^S,\pi^L}[R(\hat{t},t)\cdot\frac{\pi^S(s|t)}{\pi^S_{old}(s|t)}]$
+        \STATE Update $\theta^S$ by $\bigtriangledown_{\theta^S}J$
+        \ENDFOR
+        \STATE $\pi_{old}^S\leftarrow \pi^S$
+		\ENDFOR
+		\ENDIF
+	
+		\IF{Training the listener agent L}
+		\FOR{Batch T randomly selected from $M_0\times M_1$}
+		\FOR{$t=(c_0,c_1)$ in T}
+		\STATE $P(s_0|t),P(s_1|t)=\pi^S(s=(s_0,s_1)|t)$
+		\STATE Sample $s_0$ with $P(s_0|t)$, $s_1$ with $P(s_1|t)$
+		\STATE $P(\hat{t}|s) = \pi^L_{old}(\hat{t}|s)$ 
+		\STATE Sample $\hat{t}$ with $P(\hat{t}|s)$
+		\STATE Get reward $R(\hat{t},t)$
+		\STATE $J(\theta^S,\theta^L)=E_{\pi_{old}^S,\pi^L}[R(\hat{t},t)\cdot\frac{\pi^L(s|t)}{\pi^L_{old}(s|t)}]$
+		\STATE Update $\theta^L$ by $\bigtriangledown_{\theta^L}J$
+		\ENDFOR
+		\STATE $\pi_{old}^L\leftarrow \pi^L$
+		\ENDFOR
+		\ENDIF
+	\end{algorithmic}
+\end{algorithm}
+
--- a/模型表征能力与语言组合程度的关系.docx
+++ b/模型表征能力与语言组合程度的关系.docx