Merge branch 'master' of http://62.234.201.16/hao/AAAI21_Emergent_language

15ceebed · YZhao · f4b2c3ae · ec4ea380 · 15ceebed · 15ceebed
Commit 15ceebed authored Sep 10, 2020 by YZhao
Show whitespace changes
Inline Side-by-side

Showing with 32 additions and 14 deletions

AAAI2021/ref.bib
+18 -0

AAAI2021/tex/introduction.tex
+2 -3

AAAI2021/tex/relatedwork.tex
+9 -7

AAAI2021/tex/theory.tex
+3 -4

No files found.
--- a/AAAI2021/ref.bib
+++ b/AAAI2021/ref.bib
@@ -30,6 +30,24 @@
    abstract = "A number of recent works have proposed techniques for end-to-end learning of communication protocols among cooperative multi-agent populations, and have simultaneously found the emergence of grounded human-interpretable language in the protocols developed by the agents, learned without any human supervision! In this paper, using a Task {\&} Talk reference game between two agents as a testbed, we present a sequence of {`}negative{'} results culminating in a {`}positive{'} one {--} showing that while most agent-invented languages are effective (i.e. achieve near-perfect task rewards), they are decidedly not interpretable or compositional. In essence, we find that natural language does not emerge {`}naturally{'},despite the semblance of ease of natural-language-emergence that one may gather from recent literature. We discuss how it is possible to coax the invented languages to become more and more human-like and compositional by increasing restrictions on how two agents may communicate.",
 }

+@inproceedings{chaabouni-etal-2019-word,
+    title = "Word-order Biases in Deep-agent Emergent Communication",
+    author = "Chaabouni, Rahma  and
+      Kharitonov, Eugene  and
+      Lazaric, Alessandro  and
+      Dupoux, Emmanuel  and
+      Baroni, Marco",
+    booktitle = "Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics",
+    month = jul,
+    year = "2019",
+    address = "Florence, Italy",
+    publisher = "Association for Computational Linguistics",
+    url = "https://www.aclweb.org/anthology/P19-1509",
+    doi = "10.18653/v1/P19-1509",
+    pages = "5166--5175",
+    abstract = "Sequence-processing neural networks led to remarkable progress on many NLP tasks. As a consequence, there has been increasing interest in understanding to what extent they process language as humans do. We aim here to uncover which biases such models display with respect to {``}natural{''} word-order constraints. We train models to communicate about paths in a simple gridworld, using miniature languages that reflect or violate various natural language trends, such as the tendency to avoid redundancy or to minimize long-distance dependencies. We study how the controlled characteristics of our miniature languages affect individual learning and their stability across multiple network generations. The results draw a mixed picture. On the one hand, neural networks show a strong tendency to avoid long-distance dependencies. On the other hand, there is no clear preference for the efficient, non-redundant encoding of information that is widely attested in natural language. We thus suggest inoculating a notion of {``}effort{''} into neural networks, as a possible way to make their linguistic behavior more human-like.",
+}
+
 @article{kirby2015compression,
  title={Compression and communication in the cultural evolution of linguistic structure},
  author={Kirby, Simon and Tamariz, Monica and Cornish, Hannah and Smith, Kenny},

--- a/AAAI2021/tex/introduction.tex
+++ b/AAAI2021/tex/introduction.tex
@@ -51,8 +51,7 @@ vocabulary can express almost infinite concepts.}
 Prior studies focus on achieving high compositional symbolic language 
 through \emph{deliberately handcrafted} inductions, e.g., small vocabulary
 sizes~\cite{}, memoryless~\cite{}, addtional rewards~\cite{}, constructed loss functions~\cite{}, and
-ease-of-teaching~\cite{}. \note{The possible intuition is that high compositional symbolic
-language cannot emerge without induction in existing multi-agent environment.}
+ease-of-teaching~\cite{}. \note{Such optimization methodologies are driven by the challenges to generate high compositional symbolic without induction in existing multi-agent environment.}
 Figure~\ref{fig:induction} reports the compositionality when training two agents
 in the widely-used listener-speaker referential game for emerging 100 symbolic
 languages, and it can be observed that \note{the compositionality
@@ -65,7 +64,7 @@ Yet, few works investigate the emergence of high compositional symbolic language
 In other words, it is never clear whether \emph{natural}
 environment and agents are sufficient for achieving high compositionality. 

-In this paper, we are the first work to achieve high compositional
+This paper is the first one to achieve high compositional
 symbolic language without any deliberately handcrafted induction. The key observation
 is that the internal \emph{agent capacity} plays a crucial role in the
 compositionality of symbolic language.

--- a/AAAI2021/tex/relatedwork.tex
+++ b/AAAI2021/tex/relatedwork.tex
@@ -4,14 +4,14 @@
 %external environmental factors
 Previous works focus on the external environmental factors that impact the
 compositionality of emerged symbolic language. 
+Some significant works on studying the external environmental factor on the compositionality of emergent language are summarized on Table~\ref{tab:rel}.
 For example, ~\citet{kirby2015compression} explored how the pressures for expressivity and compressibility lead the structured language.
 ~\citet{kottur-etal-2017-natural} constrained the vocabulary size and whether the listener has memory to coax the compositionality of the emergent language.
 ~\citet{lazaridou2018emergence} showed that the degree of structure found in the input data affects the emergence of the symbolic language.
 ~\citet{li2019ease} studied how the pressure, ease of teaching, impact on the iterative language of the population regime.
 ~\citet{evtimova2018emergent} designed a novel multi-modal scenarios, which the speaker and the listener should access to different modalities of the input object, to explore the language emergence.
-Such factors are deliberately designed, which are too ideal to be true in
-the real world. None of these works realizes the importance of model capacity of
-agent itself. \rmk{this should be largely emphasized.}
+Such factors are deliberately designed, which are too ideal to be true in the real world. 
+In this paper, these handcrafted inductions above are all removed, and the high compostional language is leaded only by the agent capacity.  \rmk{this should be largely emphasized.}


 \begin{table*}[htbp]
@@ -24,11 +24,12 @@ agent itself. \rmk{this should be largely emphasized.}
    Works & Handcrafted induction & Compositionality\\ 
    \midrule
    \cite{kirby2015compression}&Expressivity and compressibility&Qualitative, Speaker\\
-    \cite{kottur-etal-2017-natural}&Vocabulary size, listener's memory&Qualitative, Speaker\\
-    \cite{choi2018compositional}&Vocabulary size, maximum message length&Qualitative, Speaker+Listener\\
+    \cite{kottur-etal-2017-natural}&Listener's memory&Qualitative, Speaker\\
+    \cite{choi2018compositional}&Maximum message length&Qualitative, Speaker+Listener\\
    \cite{lazaridou2018emergence}&Structure of input data&Quantitative, Speaker\\
    \cite{evtimova2018emergent}&Multi-modal scenarios&Quantitative, Speaker\\
    \cite{li2019ease}&Population size, resetting all listeners&Quantitative, Speaker\\
+    \cite{chaabouni-etal-2019-word}&Word-order constraints&Qualitative, Speaker\\
    \cite{chaabouni2020compositionality}&Easier to decode&Quantitative, Speaker\\
    \textbf{Ours} & \textbf{None} & \textbf{Quantitative, Speaker+Listener} \\
    \bottomrule
@@ -37,7 +38,7 @@ agent itself. \rmk{this should be largely emphasized.}

 %measure
 To measure the compositionality of emerged symbolic language, many metrics are
-proposed~\cite{}. 
+proposed~\cite{kottur-etal-2017-natural,choi2018compositional,lazaridou2018emergence,evtimova2018emergent,chaabouni2020compositionality}. 
 %Widely accepted metrics can be classified into two categories, measuring
 %positive signaling~\cite{} and measuring positive listening~\cite{}. The former
 %metrics measure the relationship between spoken symbols and received concepts
@@ -56,7 +57,8 @@ For example, ~\citet{choi2018compositional} printed the agent messages with the 
 ~\citet{kottur-etal-2017-natural} introduced the dialog tree to show the evolution of language compositionality during the trianing process.
 Latter, some quantitative metrics are explored.
 The topographic similarity\cite{lazaridou2018emergence} is introduced to measure the distances between all the possible pairs of meanings and the corresponding pairs of signals.
-\citet{chaabouni2020compositionality} proposed the positional disentanglement and the bag-of-symbols disentanglement. The positional disentanglement measures whether symbols in specific postion clearly relate to the specific attribute of the input object. The bag-of-symbols measure the permutation-invariant characteristic of a language.
+\citet{chaabouni2020compositionality} proposed the positional disentanglement, which measures whether symbols in specific postion clearly relate to the specific attribute of the input object. 
+From Table~\ref{tab:rel}, most metrics are proposed on the sight of the speaker. In our view, human begings developed the language based on both the speakers and the listener. Only one research of \cite{choi2018compositional} in Table~\ref{tab:rel} qualitatively considered from the sight of the speaker and the listener. In this paper, we propose a novel quatitative metric from both the speaker's sight and the listener's sight.




--- a/AAAI2021/tex/theory.tex
+++ b/AAAI2021/tex/theory.tex
-\section{Experimental Setup}
+\section{ Symbolic Language Producing }
 \label{sec:thory}

-In this section, we introduce the experimental setup used in this paper,
-including the environment setup, agent architecture, and training algorithm.
+Before going to the detail of the training algorithms, we first introduce the environment, gaming rules, and agent architecture for enabling the emergence of symbolic language. 


 \begin{figure}[t]
@@ -79,7 +78,7 @@ symbols $\hat{t}$ with given input sequence $s$, i.e, $o^{L}=P(\hat{t}|s_0,s_1)$

 To remove all the handcrafted induction as well as for a more realistic
 scenario, agents for this referential game are independent of each other,
-without sharing model parameters or architectural connections. As shown in
+with no shared model parameters or architectural connections. As shown in
 Algorithm~\ref{al:learning}, we train the separate Speaker $S$ and Listener $L$ with
 Stochastic Policy Gradient methodology in a tick-tock manner, i.e, training one
 agent while keeping the other one. Roughly, when training the Speaker, the