//
// This file is part of MorphoGraphX - https://www.MorphoGraphX.org  (@RichardSmithLab)
//
// MorphoGraphX development is led by the Richard S. Smith lab at the John Innes Centre, Norwich, UK
//
// If you use MorphoGraphX in your work, please cite:
//   https://doi.org/10.7554/eLife.72601
//
// For support please see the image.sc forum:
//   https://forum.image.sc/tag/MorphoGraphX
//
// MorphoGraphX is copyright by its authors, contributors, and/or their employers.
//
// MorphoGraphX is free software, and is licensed under the terms of the 
// GNU General Public License https://www.gnu.org/licenses/.
//
#ifndef CACHED_ALLOC_HPP
#define CACHED_ALLOC_HPP

#include <Config.hpp>

#ifdef THRUST_BACKEND_CUDA

#include <cuda_runtime.h> // this header is not automatically included in Thrust headers in CUDA-9.0RC
#include <thrust/system/cuda/vector.h>
#include <thrust/system/cuda/execution_policy.h>
#include <thrust/host_vector.h>
#include <thrust/generate.h>
#include <thrust/pair.h>
#include <iostream>
#include <map>
#include <list>
 
// Modified for use with MorphoGraphX
//
// Example by Nathan Bell and Jared Hoberock
// (modified by Mihail Ivakhnenko)
//
// This example demonstrates how to intercept calls to get_temporary_buffer
// and return_temporary_buffer to control how Thrust allocates temporary storage
// during algorithms such as thrust::reduce. The idea will be to create a simple
// cache of allocations to search when temporary storage is requested. If a hit
// is found in the cache, we quickly return the cached allocation instead of
// resorting to the more expensive thrust::cuda::malloc.
//
// Note: this implementation cached_allocator is not thread-safe. If multiple
// (host) threads use the same cached_allocator then they should gain exclusive
// access to the allocator before accessing its methods.
 
namespace mgx 
{
  // cached_allocator: a simple allocator for caching allocation requests
  class CachedAllocator
  {
  public:
    // just allocate bytes
    typedef char value_type;
   
    CachedAllocator() : maxBuffers(10) {}
    ~CachedAllocator() { clear(); }
   
    char* allocate(size_t n);
    void deallocate(char* ptr, size_t n);
    void clear();
   
  private:
    typedef std::pair<size_t, char*> FreeBlockPair;
    typedef std::list<FreeBlockPair> FreeBlockList;
    typedef std::map<char*, size_t> AllocatedBlockMap;
   
    uint maxBuffers;
    FreeBlockList freeBlocks;
    AllocatedBlockMap allocatedBlocks;
  };

  extern CachedAllocator cachedAlloc;
}
#endif
#endif
