abstract = "A number of recent works have proposed techniques for end-to-end learning of communication protocols among cooperative multi-agent populations, and have simultaneously found the emergence of grounded human-interpretable language in the protocols developed by the agents, learned without any human supervision! In this paper, using a Task {\&} Talk reference game between two agents as a testbed, we present a sequence of {`}negative{'} results culminating in a {`}positive{'} one {--} showing that while most agent-invented languages are effective (i.e. achieve near-perfect task rewards), they are decidedly not interpretable or compositional. In essence, we find that natural language does not emerge {`}naturally{'},despite the semblance of ease of natural-language-emergence that one may gather from recent literature. We discuss how it is possible to coax the invented languages to become more and more human-like and compositional by increasing restrictions on how two agents may communicate.",
abstract = "A number of recent works have proposed techniques for end-to-end learning of communication protocols among cooperative multi-agent populations, and have simultaneously found the emergence of grounded human-interpretable language in the protocols developed by the agents, learned without any human supervision! In this paper, using a Task {\&} Talk reference game between two agents as a testbed, we present a sequence of {`}negative{'} results culminating in a {`}positive{'} one {--} showing that while most agent-invented languages are effective (i.e. achieve near-perfect task rewards), they are decidedly not interpretable or compositional. In essence, we find that natural language does not emerge {`}naturally{'},despite the semblance of ease of natural-language-emergence that one may gather from recent literature. We discuss how it is possible to coax the invented languages to become more and more human-like and compositional by increasing restrictions on how two agents may communicate.",
}
}
@inproceedings{chaabouni-etal-2019-word,
title = "Word-order Biases in Deep-agent Emergent Communication",
author = "Chaabouni, Rahma and
Kharitonov, Eugene and
Lazaric, Alessandro and
Dupoux, Emmanuel and
Baroni, Marco",
booktitle = "Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics",
month = jul,
year = "2019",
address = "Florence, Italy",
publisher = "Association for Computational Linguistics",
abstract = "Sequence-processing neural networks led to remarkable progress on many NLP tasks. As a consequence, there has been increasing interest in understanding to what extent they process language as humans do. We aim here to uncover which biases such models display with respect to {``}natural{''} word-order constraints. We train models to communicate about paths in a simple gridworld, using miniature languages that reflect or violate various natural language trends, such as the tendency to avoid redundancy or to minimize long-distance dependencies. We study how the controlled characteristics of our miniature languages affect individual learning and their stability across multiple network generations. The results draw a mixed picture. On the one hand, neural networks show a strong tendency to avoid long-distance dependencies. On the other hand, there is no clear preference for the efficient, non-redundant encoding of information that is widely attested in natural language. We thus suggest inoculating a notion of {``}effort{''} into neural networks, as a possible way to make their linguistic behavior more human-like.",
}
@article{kirby2015compression,
@article{kirby2015compression,
title={Compression and communication in the cultural evolution of linguistic structure},
title={Compression and communication in the cultural evolution of linguistic structure},
author={Kirby, Simon and Tamariz, Monica and Cornish, Hannah and Smith, Kenny},
author={Kirby, Simon and Tamariz, Monica and Cornish, Hannah and Smith, Kenny},
\caption{The distribution of compositionality when training for 100 symbolic
\caption{The distribution of compositionality when training for 100 symbolic
languages without
languages without
any induction. It can be observed that high compositional symbolic language
any induction. It can be observed that high compositional symbolic language
...
@@ -51,8 +51,7 @@ vocabulary can express almost infinite concepts.}
...
@@ -51,8 +51,7 @@ vocabulary can express almost infinite concepts.}
Prior studies focus on achieving high compositional symbolic language
Prior studies focus on achieving high compositional symbolic language
through \emph{deliberately handcrafted} inductions, e.g., small vocabulary
through \emph{deliberately handcrafted} inductions, e.g., small vocabulary
sizes~\cite{}, memoryless~\cite{}, addtional rewards~\cite{}, constructed loss functions~\cite{}, and
sizes~\cite{}, memoryless~\cite{}, addtional rewards~\cite{}, constructed loss functions~\cite{}, and
ease-of-teaching~\cite{}. \note{The possible intuition is that high compositional symbolic
ease-of-teaching~\cite{}. \note{Such optimization methodologies are driven by the challenges to generate high compositional symbolic without induction in existing multi-agent environment.}
language cannot emerge without induction in existing multi-agent environment.}
Figure~\ref{fig:induction} reports the compositionality when training two agents
Figure~\ref{fig:induction} reports the compositionality when training two agents
in the widely-used listener-speaker referential game for emerging 100 symbolic
in the widely-used listener-speaker referential game for emerging 100 symbolic
languages, and it can be observed that \note{the compositionality
languages, and it can be observed that \note{the compositionality
...
@@ -65,7 +64,7 @@ Yet, few works investigate the emergence of high compositional symbolic language
...
@@ -65,7 +64,7 @@ Yet, few works investigate the emergence of high compositional symbolic language
In other words, it is never clear whether \emph{natural}
In other words, it is never clear whether \emph{natural}
environment and agents are sufficient for achieving high compositionality.
environment and agents are sufficient for achieving high compositionality.
In this paper, we are the first work to achieve high compositional
This paper is the first one to achieve high compositional
symbolic language without any deliberately handcrafted induction. The key observation
symbolic language without any deliberately handcrafted induction. The key observation
is that the internal \emph{agent capacity} plays a crucial role in the
is that the internal \emph{agent capacity} plays a crucial role in the
%Widely accepted metrics can be classified into two categories, measuring
%Widely accepted metrics can be classified into two categories, measuring
%positive signaling~\cite{} and measuring positive listening~\cite{}. The former
%positive signaling~\cite{} and measuring positive listening~\cite{}. The former
%metrics measure the relationship between spoken symbols and received concepts
%metrics measure the relationship between spoken symbols and received concepts
...
@@ -59,7 +60,8 @@ For example, ~\citet{choi2018compositional} printed the agent messages with the
...
@@ -59,7 +60,8 @@ For example, ~\citet{choi2018compositional} printed the agent messages with the
~\citet{kottur-etal-2017-natural} introduced the dialog tree to show the evolution of language compositionality during the trianing process.
~\citet{kottur-etal-2017-natural} introduced the dialog tree to show the evolution of language compositionality during the trianing process.
Latter, some quantitative metrics are explored.
Latter, some quantitative metrics are explored.
The topographic similarity\cite{lazaridou2018emergence} is introduced to measure the distances between all the possible pairs of meanings and the corresponding pairs of signals.
The topographic similarity\cite{lazaridou2018emergence} is introduced to measure the distances between all the possible pairs of meanings and the corresponding pairs of signals.
\citet{chaabouni2020compositionality} proposed the positional disentanglement and the bag-of-symbols disentanglement. The positional disentanglement measures whether symbols in specific postion clearly relate to the specific attribute of the input object. The bag-of-symbols measure the permutation-invariant characteristic of a language.
\citet{chaabouni2020compositionality} proposed the positional disentanglement, which measures whether symbols in specific postion clearly relate to the specific attribute of the input object.
From Table~\ref{tab:rel}, most metrics are proposed on the sight of the speaker. In our view, human begings developed the language based on both the speakers and the listener. Only one research of \cite{choi2018compositional} in Table~\ref{tab:rel} qualitatively considered from the sight of the speaker and the listener. In this paper, we propose a novel quatitative metric from both the speaker's sight and the listener's sight.
In this section, we introduce the experimental setup used in this paper,
Before going to the detail of the training algorithms, we first introduce the environment, gaming rules, and agent architecture for enabling the emergence of symbolic language.
including the environment setup, agent architecture, and training algorithm.
MIS is a bilateral metric. Unilateral metrics, e.g. \emph{topographic similarity (topo)}\cite{} and \emph{posdis}\cite{}, only take the policy of the speaker into consideration. We provide an example to illustrate the inadequacy of unilateral metrics, shown in Figure~\ref{fig:unilateral}. In this example, the speaker only uses $s_1$ to represent shape. From the perspective of speaker, the language is perfectly compositional (i.e. both topo and posdis are 1). However, the listener cannot distinguish the shape depend only on $s_1$, showing the non-compositionality in this language. The bilateral metric MIS addresses such defect by taking the policy of the listener into account, thus $MIS < 1$.
MIS is a bilateral metric. Unilateral metrics, e.g. \emph{topographic similarity (topo)}\cite{} and \emph{posdis}\cite{}, only take the policy of the speaker into consideration. We provide an example to illustrate the inadequacy of unilateral metrics, shown in Figure~\ref{fig:unilateral}. In this example, the speaker only uses $s_1$ to represent shape. From the perspective of speaker, the language is perfectly compositional (i.e. both topo and posdis are 1). However, the listener cannot distinguish the shape depend only on $s_1$, showing the non-compositionality in this language. The bilateral metric MIS addresses such defect by taking the policy of the listener into account, thus $\mathit{MIS} < 1$.