from __future__ import print_function
import hashlib
import os
import shutil
import tempfile

import filelock
from six.moves.urllib import request

_dataset_root = os.environ.get('CHAINER_DATASET_ROOT',
os.path.expanduser('~/.chainer/dataset'))

[docs]def get_dataset_root():
"""Gets the path to the root directory to download and cache datasets.

Returns:
str: The path to the dataset root directory.

"""
return _dataset_root

[docs]def set_dataset_root(path):

There are two ways to set the dataset root directory. One is by setting the
environment variable CHAINER_DATASET_ROOT. The other is by using this
function. If both are specified, one specified via this function is used.
The default dataset root is \$HOME/.chainer/dataset.

Args:
path (str): Path to the new dataset root directory.

"""
global _dataset_root
_dataset_root = path

def get_dataset_directory(dataset_name, create_directory=True):
"""Gets the path to the directory of given dataset.

The generated path is just a concatenation of the global root directory
(see :func:set_dataset_root for how to change it) and the dataset name.
The dataset name can contain slashes, which are treated as path separators.

Args:
dataset_name (str): Name of the dataset.
create_directory (bool): If True (default), this function also creates
the directory at the first time. If the directory already exists,
then this option is ignored.

Returns:
str: Path to the dataset directory.

"""
path = os.path.join(_dataset_root, dataset_name)
if create_directory:
try:
os.makedirs(path)
except OSError:
pass
return path

It downloads a file from the URL if there is no corresponding cache. After
the download, this function stores a cache to the directory under the
dataset root (see :func:set_dataset_root). If there is already a cache
for the given URL, it just returns the path to the cache without

Args:

Returns:

"""
cache_root = os.path.join(_dataset_root, '_dl_cache')
try:
os.makedirs(cache_root)
except OSError:
if not os.path.exists(cache_root):

lock_path = os.path.join(cache_root, '_dl_lock')
urlhash = hashlib.md5(url.encode('utf-8')).hexdigest()
cache_path = os.path.join(cache_root, urlhash)

with filelock.FileLock(lock_path):
if os.path.exists(cache_path):
return cache_path

temp_root = tempfile.mkdtemp(dir=cache_root)
try:
temp_path = os.path.join(temp_root, 'dl')
request.urlretrieve(url, temp_path)
with filelock.FileLock(lock_path):
shutil.move(temp_path, cache_path)
finally:
shutil.rmtree(temp_root)

return cache_path

"""Caches a file if it does not exist, or loads it otherwise.

creator creates the file to given path, and returns the content. If the
file already exists, the loader is called instead, and it loads the
file and returns the content.

Note that the path passed to the creator is temporary one, and not same as
the path given to this function. This function safely renames the file
created by the creator to a given path, even if this function is called
simultaneously by multiple threads or processes.

Args:
path (str): Path to save the cached file.
creator: Function to create the file and returns the content. It takes
a path to temporary place as the argument. Before calling the
creator, there is no file at the temporary path.

Returns:
It returns the returned values by the creator or the loader.

"""
if os.path.exists(path):

file_name = os.path.basename(path)
temp_dir = tempfile.mkdtemp()
temp_path = os.path.join(temp_dir, file_name)

lock_path = os.path.join(_dataset_root, '_create_lock')

try:
content = creator(temp_path)
with filelock.FileLock(lock_path):
if not os.path.exists(path):
shutil.move(temp_path, path)
finally:
shutil.rmtree(temp_dir)

return content