[pytorch修改]dataloader.py 实现darknet中的subdivision功能

dataloader.py
import random

import torch

import torch.multiprocessing as multiprocessing

from torch._C import _set_worker_signal_handlers, _update_worker_pids, \

    _remove_worker_pids, _error_if_any_worker_fails

from . import SequentialSampler, RandomSampler, BatchSampler

import signal

import functools

import collections

import re

import sys

import threading

import traceback

import os

import time

from torch._six import string_classes, int_classes, FileNotFoundError

IS_WINDOWS = sys.platform == "win32"

if IS_WINDOWS:

    import ctypes

    from ctypes.wintypes import DWORD, BOOL, HANDLE

if sys.version_info[0] == 2:

    import Queue as queue

else:

    import queue

class ExceptionWrapper(object):

    r"""Wraps an exception plus traceback to communicate across threads"""

    def __init__(self, exc_info):

        self.exc_type = exc_info[0]

        self.exc_msg = "".join(traceback.format_exception(*exc_info))

_use_shared_memory = False

r"""Whether to use shared memory in default_collate"""

MANAGER_STATUS_CHECK_INTERVAL = 5.0

if IS_WINDOWS:

    # On Windows, the parent ID of the worker process remains unchanged when the manager process

    # is gone, and the only way to check it through OS is to let the worker have a process handle

    # of the manager and ask if the process status has changed.

    class ManagerWatchdog(object):

        def __init__(self):

            self.manager_pid = os.getppid()

            self.kernel32 = ctypes.WinDLL('kernel32', use_last_error=True)

            self.kernel32.OpenProcess.argtypes = (DWORD, BOOL, DWORD)

            self.kernel32.OpenProcess.restype = HANDLE

            self.kernel32.WaitForSingleObject.argtypes = (HANDLE, DWORD)

            self.kernel32.WaitForSingleObject.restype = DWORD

            # Value obtained from https://msdn.microsoft.com/en-us/library/ms684880.aspx

            SYNCHRONIZE = 0x00100000

            self.manager_handle = self.kernel32.OpenProcess(SYNCHRONIZE, 0, self.manager_pid)

            if not self.manager_handle:

                raise ctypes.WinError(ctypes.get_last_error())

        def is_alive(self):

            # Value obtained from https://msdn.microsoft.com/en-us/library/windows/desktop/ms687032.aspx

            return self.kernel32.WaitForSingleObject(self.manager_handle, 0) != 0

else:

    class ManagerWatchdog(object):

        def __init__(self):

            self.manager_pid = os.getppid()

        def is_alive(self):

            return os.getppid() == self.manager_pid

def _worker_loop(dataset, index_queue, data_queue, collate_fn, seed, init_fn, worker_id):

    global _use_shared_memory

    _use_shared_memory = True

    # Intialize C side signal handlers for SIGBUS and SIGSEGV. Python signal

    # module's handlers are executed after Python returns from C low-level

    # handlers, likely when the same fatal signal happened again already.

    # https://docs.python.org/3/library/signal.html Sec. 18.8.1.1

    _set_worker_signal_handlers()

    torch.set_num_threads(1)

    random.seed(seed)

    torch.manual_seed(seed)

    if init_fn is not None:

        init_fn(worker_id)

    watchdog = ManagerWatchdog()

    while True:

        try:

            r = index_queue.get(timeout=MANAGER_STATUS_CHECK_INTERVAL)

        except queue.Empty:

            if watchdog.is_alive():

                continue

            else:

                break

        if r is None:

            break

        idx, batch_indices = r

        try:

            samples = collate_fn([dataset[i] for i in batch_indices])

        except Exception:

            data_queue.put((idx, ExceptionWrapper(sys.exc_info())))

        else:

            data_queue.put((idx, samples))

            del samples

def _worker_manager_loop(in_queue, out_queue, done_event, pin_memory, device_id):

    if pin_memory:

        torch.cuda.set_device(device_id)

    while True:

        try:

            r = in_queue.get()

        except Exception:

            if done_event.is_set():

                return

            raise

        if r is None:

            break

        if isinstance(r[1], ExceptionWrapper):

            out_queue.put(r)

            continue

        idx, batch = r

        try:

            if pin_memory:

                batch = pin_memory_batch(batch)

        except Exception:

            out_queue.put((idx, ExceptionWrapper(sys.exc_info())))

        else:

            out_queue.put((idx, batch))

numpy_type_map = {

    'float64': torch.DoubleTensor,

    'float32': torch.FloatTensor,

    'float16': torch.HalfTensor,

    'int64': torch.LongTensor,

    'int32': torch.IntTensor,

    'int16': torch.ShortTensor,

    'int8': torch.CharTensor,

    'uint8': torch.ByteTensor,

}

def default_collate(batch):

    r"""Puts each data field into a tensor with outer dimension batch size"""

    error_msg = "batch must contain tensors, numbers, dicts or lists; found {}"

    elem_type = type(batch[0])

    if isinstance(batch[0], torch.Tensor):

        out = None

        if _use_shared_memory:

            # If we're in a background process, concatenate directly into a

            # shared memory tensor to avoid an extra copy

            numel = sum([x.numel() for x in batch])

            storage = batch[0].storage()._new_shared(numel)

            out = batch[0].new(storage)

        return torch.stack(batch, 0, out=out)

    elif elem_type.__module__ == 'numpy' and elem_type.__name__ != 'str_' \

            and elem_type.__name__ != 'string_':

        elem = batch[0]

        if elem_type.__name__ == 'ndarray':

            # array of string classes and object

            if re.search('[SaUO]', elem.dtype.str) is not None:

                raise TypeError(error_msg.format(elem.dtype))

            return torch.stack([torch.from_numpy(b) for b in batch], 0)

        if elem.shape == ():  # scalars

            py_type = float if elem.dtype.name.startswith('float') else int

            return numpy_type_map[elem.dtype.name](list(map(py_type, batch)))

    elif isinstance(batch[0], int_classes):

        return torch.LongTensor(batch)

    elif isinstance(batch[0], float):

        return torch.DoubleTensor(batch)

    elif isinstance(batch[0], string_classes):

        return batch

    elif isinstance(batch[0], collections.Mapping):

        return {key: default_collate([d[key] for d in batch]) for key in batch[0]}

    elif isinstance(batch[0], collections.Sequence):

        transposed = zip(*batch)

        return [default_collate(samples) for samples in transposed]

    raise TypeError((error_msg.format(type(batch[0]))))

def pin_memory_batch(batch):

    if isinstance(batch, torch.Tensor):

        return batch.pin_memory()

    elif isinstance(batch, string_classes):

        return batch

    elif isinstance(batch, collections.Mapping):

        return {k: pin_memory_batch(sample) for k, sample in batch.items()}

    elif isinstance(batch, collections.Sequence):

        return [pin_memory_batch(sample) for sample in batch]

    else:

        return batch

_SIGCHLD_handler_set = False

r"""Whether SIGCHLD handler is set for DataLoader worker failures. Only one

handler needs to be set for all DataLoaders in a process."""

def _set_SIGCHLD_handler():

    # Windows doesn't support SIGCHLD handler

    if sys.platform == 'win32':

        return

    # can't set signal in child threads

    if not isinstance(threading.current_thread(), threading._MainThread):

        return

    global _SIGCHLD_handler_set

    if _SIGCHLD_handler_set:

        return

    previous_handler = signal.getsignal(signal.SIGCHLD)

    if not callable(previous_handler):

        previous_handler = None

    def handler(signum, frame):

        # This following call uses `waitid` with WNOHANG from C side. Therefore,

        # Python can still get and update the process status successfully.

        _error_if_any_worker_fails()

        if previous_handler is not None:

            previous_handler(signum, frame)

    signal.signal(signal.SIGCHLD, handler)

    _SIGCHLD_handler_set = True

class _DataLoaderIter(object):

    r"""Iterates once over the DataLoader's dataset, as specified by the sampler"""

    def __init__(self, loader):

        self.dataset = loader.dataset

        self.collate_fn = loader.collate_fn

        self.batch_sampler = loader.batch_sampler

        self.num_workers = loader.num_workers

        self.pin_memory = loader.pin_memory and torch.cuda.is_available()

        self.timeout = loader.timeout

        self.done_event = threading.Event()

        self.sample_iter = iter(self.batch_sampler)

        base_seed = torch.LongTensor(1).random_().item()

        if self.num_workers > 0:

            self.worker_init_fn = loader.worker_init_fn

            self.index_queues = [multiprocessing.Queue() for _ in range(self.num_workers)]

            self.worker_queue_idx = 0

            self.worker_result_queue = multiprocessing.SimpleQueue()

            self.batches_outstanding = 0

            self.worker_pids_set = False

            self.shutdown = False

            self.send_idx = 0

            self.rcvd_idx = 0

            self.reorder_dict = {}

            self.workers = [

                multiprocessing.Process(

                    target=_worker_loop,

                    args=(self.dataset, self.index_queues[i],

                          self.worker_result_queue, self.collate_fn, base_seed + i,

                          self.worker_init_fn, i))

                for i in range(self.num_workers)]

            if self.pin_memory or self.timeout > 0:

                self.data_queue = queue.Queue()

                if self.pin_memory:

                    maybe_device_id = torch.cuda.current_device()

                else:

                    # do not initialize cuda context if not necessary

                    maybe_device_id = None

                self.worker_manager_thread = threading.Thread(

                    target=_worker_manager_loop,

                    args=(self.worker_result_queue, self.data_queue, self.done_event, self.pin_memory,

                          maybe_device_id))

                self.worker_manager_thread.daemon = True

                self.worker_manager_thread.start()

            else:

                self.data_queue = self.worker_result_queue

            for w in self.workers:

                w.daemon = True  # ensure that the worker exits on process exit

                w.start()

            _update_worker_pids(id(self), tuple(w.pid for w in self.workers))

            _set_SIGCHLD_handler()

            self.worker_pids_set = True

            # prime the prefetch loop

            for _ in range(2 * self.num_workers):

                self._put_indices()

    def __len__(self):

        return len(self.batch_sampler)

    def _get_batch(self):

        if self.timeout > 0:

            try:

                return self.data_queue.get(timeout=self.timeout)

            except queue.Empty:

                raise RuntimeError('DataLoader timed out after {} seconds'.format(self.timeout))

        else:

            return self.data_queue.get()

    def __next__(self):

        if self.num_workers == 0:  # same-process loading

            indices = next(self.sample_iter)  # may raise StopIteration

            batch = self.collate_fn([self.dataset[i] for i in indices])

            if self.pin_memory:

                batch = pin_memory_batch(batch)

            return batch

        # check if the next sample has already been generated

        if self.rcvd_idx in self.reorder_dict:

            batch = self.reorder_dict.pop(self.rcvd_idx)

            return self._process_next_batch(batch)

        if self.batches_outstanding == 0:

            self._shutdown_workers()

            raise StopIteration

        while True:

            assert (not self.shutdown and self.batches_outstanding > 0)

            idx, batch = self._get_batch()

            self.batches_outstanding -= 1

            if idx != self.rcvd_idx:

                # store out-of-order samples

                self.reorder_dict[idx] = batch

                continue

            return self._process_next_batch(batch)

    next = __next__  # Python 2 compatibility

    def __iter__(self):

        return self

    def _put_indices(self):

        assert self.batches_outstanding < 2 * self.num_workers

        indices = next(self.sample_iter, None)

        if indices is None:

            return

        self.index_queues[self.worker_queue_idx].put((self.send_idx, indices))

        self.worker_queue_idx = (self.worker_queue_idx + 1) % self.num_workers

        self.batches_outstanding += 1

        self.send_idx += 1

    def _process_next_batch(self, batch):

        self.rcvd_idx += 1

        self._put_indices()

        if isinstance(batch, ExceptionWrapper):

            raise batch.exc_type(batch.exc_msg)

        return batch

    def __getstate__(self):

        # TODO: add limited pickling support for sharing an iterator

        # across multiple threads for HOGWILD.

        # Probably the best way to do this is by moving the sample pushing

        # to a separate thread and then just sharing the data queue

        # but signalling the end is tricky without a non-blocking API

        raise NotImplementedError("_DataLoaderIter cannot be pickled")

    def _shutdown_workers(self):

        try:

            if not self.shutdown:

                self.shutdown = True

                self.done_event.set()

                for q in self.index_queues:

                    q.put(None)

                # if some workers are waiting to put, make place for them

                try:

                    while not self.worker_result_queue.empty():

                        self.worker_result_queue.get()

                except (FileNotFoundError, ImportError):

                    # Many weird errors can happen here due to Python

                    # shutting down. These are more like obscure Python bugs.

                    # FileNotFoundError can happen when we rebuild the fd

                    # fetched from the queue but the socket is already closed

                    # from the worker side.

                    # ImportError can happen when the unpickler loads the

                    # resource from `get`.

                    pass

                # done_event should be sufficient to exit worker_manager_thread,

                # but be safe here and put another None

                self.worker_result_queue.put(None)

        finally:

            # removes pids no matter what

            if self.worker_pids_set:

                _remove_worker_pids(id(self))

                self.worker_pids_set = False

    def __del__(self):

        if self.num_workers > 0:

            self._shutdown_workers()

class DataLoader(object):

    r"""

    Data loader. Combines a dataset and a sampler, and provides

    single- or multi-process iterators over the dataset.

    Arguments:

        dataset (Dataset): dataset from which to load the data.

        batch_size (int, optional): how many samples per batch to load

            (default: 1).

        shuffle (bool, optional): set to ``True`` to have the data reshuffled

            at every epoch (default: False).

        sampler (Sampler, optional): defines the strategy to draw samples from

            the dataset. If specified, ``shuffle`` must be False.

        batch_sampler (Sampler, optional): like sampler, but returns a batch of

            indices at a time. Mutually exclusive with batch_size, shuffle,

            sampler, and drop_last.

        num_workers (int, optional): how many subprocesses to use for data

            loading. 0 means that the data will be loaded in the main process.

            (default: 0)

        collate_fn (callable, optional): merges a list of samples to form a mini-batch.

        pin_memory (bool, optional): If ``True``, the data loader will copy tensors

            into CUDA pinned memory before returning them.

        drop_last (bool, optional): set to ``True`` to drop the last incomplete batch,

            if the dataset size is not divisible by the batch size. If ``False`` and

            the size of dataset is not divisible by the batch size, then the last batch

            will be smaller. (default: False)

        timeout (numeric, optional): if positive, the timeout value for collecting a batch

            from workers. Should always be non-negative. (default: 0)

        worker_init_fn (callable, optional): If not None, this will be called on each

            worker subprocess with the worker id (an int in ``[0, num_workers - 1]``) as

            input, after seeding and before data loading. (default: None)

    .. note:: By default, each worker will have its PyTorch seed set to

              ``base_seed + worker_id``, where ``base_seed`` is a long generated

              by main process using its RNG. However, seeds for other libraies

              may be duplicated upon initializing workers (w.g., NumPy), causing

              each worker to return identical random numbers. (See

              :ref:`dataloader-workers-random-seed` section in FAQ.) You may

              use ``torch.initial_seed()`` to access the PyTorch seed for each

              worker in :attr:`worker_init_fn`, and use it to set other seeds

              before data loading.

    .. warning:: If ``spawn`` start method is used, :attr:`worker_init_fn` cannot be an

                 unpicklable object, e.g., a lambda function.

    """

    __initialized = False

    def __init__(self, dataset, batch_size=1, subdivison=1, shuffle=False, sampler=None, batch_sampler=None,

                 num_workers=0, collate_fn=default_collate, pin_memory=False, drop_last=False,

                 timeout=0, worker_init_fn=None):

        self.dataset = dataset

        self.batch_size_real = batch_size

        self.subdivision = subdivison

        self.batch_size = batch_size/subdivison

        self.num_workers = num_workers

        self.collate_fn = collate_fn

        self.pin_memory = pin_memory

        self.drop_last = drop_last

        self.timeout = timeout

        self.worker_init_fn = worker_init_fn

        if timeout < 0:

            raise ValueError('timeout option should be non-negative')

        if batch_sampler is not None:

            if batch_size > 1 or shuffle or sampler is not None or drop_last:

                raise ValueError('batch_sampler option is mutually exclusive '

                                 'with batch_size, shuffle, sampler, and '

                                 'drop_last')

            self.batch_size = None

            self.drop_last = None

        if sampler is not None and shuffle:

            raise ValueError('sampler option is mutually exclusive with '

                             'shuffle')

        if self.num_workers < 0:

            raise ValueError('num_workers option cannot be negative; '

                             'use num_workers=0 to disable multiprocessing.')

        if batch_sampler is None:

            if sampler is None:

                if shuffle:

                    sampler = RandomSampler(dataset)

                else:

                    sampler = SequentialSampler(dataset)

            batch_sampler = BatchSampler(sampler, batch_size, drop_last)

        self.sampler = sampler

        self.batch_sampler = batch_sampler

        self.__initialized = True

    def __setattr__(self, attr, val):

        if self.__initialized and attr in ('batch_size', 'sampler', 'drop_last'):

            raise ValueError('{} attribute should not be set after {} is '

                             'initialized'.format(attr, self.__class__.__name__))

        super(DataLoader, self).__setattr__(attr, val)

    def __iter__(self):

        return _DataLoaderIter(self)

    def __len__(self):

        return len(self.batch_sampler)
秒客网

[pytorch修改]dataloader.py 实现darknet中的subdivision功能

相关文章