// Basic format [ "Authors", // use ", " or " and " between authors "Title", "Venue", "Venue short name", // leave empty for arXiv-only "Abstract", // don't include latex commands here 'Bibtex', // note: use ' not " here; for arXiv-only can use Bibtex from the paper's arXiv page [ "arXiv" => "URL", "Code" => "URL", ... ], "tag1,tag2,tag3" // tags all lower-case, no spaces; venue short name is automatically added; only use tags from the below tag list ], // Tag list $tags = array( "sur" => "survey", "drl" => "deep-rl", "marl" => "multi-agent-rl", "am" => "agent-modelling", "aht" => "ad-hoc-teamwork", "av" => "autonomous-driving", "gr" => "goal-recognition", "xai" => "explainable-ai", "cau" => "causal", "sec" => "security", "ecom" => "emergent-communication", "inrew" => "intrinsic-reward", "sim" => "simulator", "est" => "state-estimation" ); // Example: arXiv-only, not yet published [ "Maciej Wiatrak, Stefano V. Albrecht, Andrew Nystrom", "Stabilizing Generative Adversarial Networks: A Survey", "arXiv:1910.00927", "", "Generative Adversarial Networks (GANs) are a type of generative model which have received much attention due to their ability to model complex real-world data. Despite their recent successes, the process of training GANs remains challenging, suffering from instability problems such as non-convergence, vanishing or exploding gradients, and mode collapse. In recent years, a diverse set of approaches have been proposed which focus on stabilizing the GAN training procedure. The purpose of this survey is to provide a comprehensive overview of the GAN training stabilization methods which can be found in the literature. We discuss the advantages and disadvantages of each approach, offer a comparative summary, and conclude with a discussion of open problems.", '@misc{wiatrak2019stabilizing, title={Stabilizing Generative Adversarial Networks: A Survey}, author={Maciej Wiatrak and Stefano V. Albrecht and Andrew Nystrom}, year={2019}, eprint={1910.00927}, archivePrefix={arXiv}, primaryClass={cs.LG} }', [ "arXiv" => "https://arxiv.org/abs/1910.00927"], "sur,sec" ], // Example: workshop [ "Lukas Schäfer and Filippos Christianos and Josiah Hanna and Stefano V Albrecht", "Decoupling Exploration and Exploitation in Reinforcement Learning", "ICML Workshop on Unsupervised Reinforcement Learning", "ICML", "Intrinsic rewards are commonly applied to improve exploration in reinforcement learning. However, these approaches suffer from instability caused by non-stationary reward shaping and strong dependency on hyperparameters. In this work, we propose Decoupled RL (DeRL) which trains separate policies for exploration and exploitation. DeRL can be applied with on-policy and off-policy RL algorithms. We evaluate DeRL algorithms in two sparse-reward environments with multiple types of intrinsic rewards. We show that DeRL is more robust to scaling and speed of decay of intrinsic rewards and converges to the same evaluation returns than intrinsically motivated baselines in fewer interactions.", '@inproceedings{schaefer2021decoupling, title={Decoupling Exploration and Exploitation in Reinforcement Learning}, author={Lukas Schäfer and Filippos Christianos and Josiah Hanna and Stefano V. Albrecht}, booktitle={ICML Workshop on Unsupervised Reinforcement Learning (URL)}, year={2021} }', [ "arXiv" => "https://arxiv.org/abs/2006.07169", "Code" => "https://github.com/uoe-agents/derl" ], "drl,inrew" ], // Example: conference [ "Arrasy Rahman and Niklas Höpner and Filippos Christianos and Stefano V. Albrecht", "Towards Open Ad Hoc Teamwork Using Graph-based Policy Learning", "International Conference on Machine Learning", "ICML", "Ad hoc teamwork is the challenging problem of designing an autonomous agent which can adapt quickly to collaborate with teammates without prior coordination mechanisms, including joint training. Prior work in this area has focused on closed teams in which the number of agents is fixed. In this work, we consider open teams by allowing agents with different fixed policies to enter and leave the environment without prior notification. Our solution builds on graph neural networks to learn agent models and joint-action value models under varying team compositions. We contribute a novel action-value computation that integrates the agent model and joint-action value model to produce action-value estimates. We empirically demonstrate that our approach successfully models the effects other agents have on the learner, leading to policies that robustly adapt to dynamic team compositions and significantly outperform several alternative methods.", '@inproceedings{rahman2021open, title={Towards Open Ad Hoc Teamwork Using Graph-based Policy Learning}, author={Arrasy Rahman and Niklas H\"opner and Filippos Christianos and Stefano V. Albrecht}, booktitle={International Conference on Machine Learning (ICML)}, year={2021} }', [ "arXiv" => "https://arxiv.org/abs/2006.10412", "Code" => "https://github.com/uoe-agents/GPL" ], "drl,am,aht" ], // Example: journal [ "Stefano V. Albrecht, Peter Stone", "Autonomous Agents Modelling Other Agents: A Comprehensive Survey and Open Problems", "Artificial Intelligence", "AIJ", "Much research in artificial intelligence is concerned with the development of autonomous agents that can interact effectively with other agents. An important aspect of such agents is the ability to reason about the behaviours of other agents, by constructing models which make predictions about various properties of interest (such as actions, goals, beliefs) of the modelled agents. A variety of modelling approaches now exist which vary widely in their methodology and underlying assumptions, catering to the needs of the different sub-communities within which they were developed and reflecting the different practical uses for which they are intended. The purpose of the present article is to provide a comprehensive survey of the salient modelling methods which can be found in the literature. The article concludes with a discussion of open problems which may form the basis for fruitful future research.", '@article{ albrecht2018modelling, title = {Autonomous Agents Modelling Other Agents: A Comprehensive Survey and Open Problems}, author = {Stefano V. Albrecht and Peter Stone}, journal = {Artificial Intelligence}, volume = {258}, pages = {66--95}, year = {2018}, publisher = {Elsevier}, note = {DOI: 10.1016/j.artint.2018.01.002} }', [ "arXiv" => "https://arxiv.org/abs/1709.08071", "Publisher" => "https://www.sciencedirect.com/science/article/pii/S0004370218300249" ], "am,gr" ],