Source code for rl.memory

from __future__ import absolute_import
from collections import namedtuple
from rl.utils.memory import RingBuffer, sample_batch_indexes
import numpy as np
import pickle

# This is to be understood as a transition: Given `state0`, performing `action`
# yields `reward` and results in `state1`, which might be `terminal`.
Experience = namedtuple('Experience',
                        'state0, action, reward, state1, terminal1')

# A batch
# It stores data element-wise, instead of experience-wise
Batch = namedtuple("Batch", ("state0", "action", "reward", "state1",
                             "terminal1"))


[docs]class Memory(object): """ Abstract memory class """ def __init__(self, env): self.env = env
[docs] def sample(self, batch_size): """ Get a sample from the memory :param int batch_size: size of the batch :return: A :class:`Batch` object """ raise NotImplementedError()
[docs] def append(self, experience): """Add the experience to the memory""" raise NotImplementedError()
[docs]class SimpleMemory(Memory): """ A simple memory directly storing experiences in a circular buffer Data is stored directly as an array of :class:`Experience`""" def __init__(self, env, limit): super(SimpleMemory, self).__init__(env) self.buffer = RingBuffer(limit)
[docs] def get_idxs(self, idxs, batch_size): """Get a non-contiguous series of indexes""" # Allocate memory state0_batch = np.empty((batch_size, self.env.observation_space.dim)) action_batch = np.empty((batch_size, self.env.action_space.dim)) reward_batch = np.empty((batch_size, 1)) terminal1_batch = np.empty((batch_size, 1), dtype=bool) state1_batch = np.empty((batch_size, self.env.observation_space.dim)) for batch_index, memory_index in enumerate(idxs): experience = self.buffer[memory_index] state0_batch[batch_index, :] = experience.state0 action_batch[batch_index, :] = experience.action reward_batch[batch_index, :] = experience.reward terminal1_batch[batch_index, :] = experience.terminal1 state1_batch[batch_index, :] = experience.state1 batch = Batch( state0=state0_batch, action=action_batch, reward=reward_batch, terminal1=terminal1_batch, state1=state1_batch) return batch
def sample(self, batch_size, batch_idxs=None): available_samples = len(self) if batch_size > available_samples: raise(IndexError("Not enough elements in the memory (currently {}) to sample a batch of size {}".format(len(self), batch_size))) if batch_idxs is None: # Draw random indexes such that we have at least a single entry before each # index. batch_idxs = sample_batch_indexes(0, available_samples - 1, size=batch_size) batch_idxs = np.array(batch_idxs) + 1 return (self.get_idxs(batch_idxs, batch_size=batch_size)) def append(self, experience): self.buffer.append(experience) @classmethod
[docs] def from_file(cls, env, limit, file_path): """Create a memory from a pickle file""" with open(file_path, "rb") as fd: memory_database = pickle.load(fd) memory = cls(limit=limit, env=env) for experience in memory_database: memory.append(Experience(*experience)) return(memory)
[docs] def save(self, file): """Dump the memory into a pickle file""" print("Saving memory") with open(file, "wb") as fd: pickle.dump(self.buffer.dump(), fd)
[docs] def dump(self): """Get the memory content as a single array""" return(self.buffer.dump())
def __len__(self): return(len(self.buffer))