
    sgJ                        d dl Z d dlZd dlZd dlZd dlmZmZmZmZm	Z	m
Z
mZmZmZmZ d dlmZ d dlmZmZmZmZ g dZ ed      Z edd	      Zeeef   Zeed
f   Z edee      Z G d dee         Z G d dee   ee         Z G d deeed
f            Z G d dee         Z  G d dee         Z! G d de      Z" G d dee         Z#efdee   deee$e%f      de
e   de	e#e      fdZ&y)    N)
castDictGenericIterableListOptionalSequenceTupleTypeVarUnion)
deprecated)default_generator	GeneratorrandpermTensor)DatasetIterableDatasetTensorDatasetStackDatasetConcatDatasetChainDatasetSubsetrandom_split_T_T_coT)	covariant._T_stackc                   $    e Zd ZdZdefdZddZy)r   a  An abstract class representing a :class:`Dataset`.

    All datasets that represent a map from keys to data samples should subclass
    it. All subclasses should overwrite :meth:`__getitem__`, supporting fetching a
    data sample for a given key. Subclasses could also optionally overwrite
    :meth:`__len__`, which is expected to return the size of the dataset by many
    :class:`~torch.utils.data.Sampler` implementations and the default options
    of :class:`~torch.utils.data.DataLoader`. Subclasses could also
    optionally implement :meth:`__getitems__`, for speedup batched samples
    loading. This method accepts list of indices of samples of batch and returns
    list of samples.

    .. note::
      :class:`~torch.utils.data.DataLoader` by default constructs an index
      sampler that yields integral indices.  To make it work with a map-style
      dataset with non-integral indices/keys, a custom sampler must be provided.
    returnc                     t        d      )Nz3Subclasses of Dataset should implement __getitem__.)NotImplementedErrorselfindexs     K/var/www/html/venv/lib/python3.12/site-packages/torch/utils/data/dataset.py__getitem__zDataset.__getitem__>   s    !"WXX    c                     t        | |g      S N)r   r#   others     r%   __add__zDataset.__add__E   s    dE]++r'   N)r+   zDataset[_T_co]r   zConcatDataset[_T_co])__name__
__module____qualname____doc__r   r&   r,    r'   r%   r   r   +   s    $YE Y,r'   r   c                   "    e Zd ZdZdee   fdZy)r   aH  An iterable Dataset.

    All datasets that represent an iterable of data samples should subclass it.
    Such form of datasets is particularly useful when data come from a stream.

    All subclasses should overwrite :meth:`__iter__`, which would return an
    iterator of samples in this dataset.

    When a subclass is used with :class:`~torch.utils.data.DataLoader`, each
    item in the dataset will be yielded from the :class:`~torch.utils.data.DataLoader`
    iterator. When :attr:`num_workers > 0`, each worker process will have a
    different copy of the dataset object, so it is often desired to configure
    each copy independently to avoid having duplicate data returned from the
    workers. :func:`~torch.utils.data.get_worker_info`, when called in a worker
    process, returns information about the worker. It can be used in either the
    dataset's :meth:`__iter__` method or the :class:`~torch.utils.data.DataLoader` 's
    :attr:`worker_init_fn` option to modify each copy's behavior.

    Example 1: splitting workload across all workers in :meth:`__iter__`::

        >>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_DATALOADER)
        >>> # xdoctest: +SKIP("Fails on MacOS12")
        >>> class MyIterableDataset(torch.utils.data.IterableDataset):
        ...     def __init__(self, start, end):
        ...         super(MyIterableDataset).__init__()
        ...         assert end > start, "this example code only works with end >= start"
        ...         self.start = start
        ...         self.end = end
        ...
        ...     def __iter__(self):
        ...         worker_info = torch.utils.data.get_worker_info()
        ...         if worker_info is None:  # single-process data loading, return the full iterator
        ...             iter_start = self.start
        ...             iter_end = self.end
        ...         else:  # in a worker process
        ...             # split workload
        ...             per_worker = int(math.ceil((self.end - self.start) / float(worker_info.num_workers)))
        ...             worker_id = worker_info.id
        ...             iter_start = self.start + worker_id * per_worker
        ...             iter_end = min(iter_start + per_worker, self.end)
        ...         return iter(range(iter_start, iter_end))
        ...
        >>> # should give same set of data as range(3, 7), i.e., [3, 4, 5, 6].
        >>> ds = MyIterableDataset(start=3, end=7)

        >>> # Single-process loading
        >>> print(list(torch.utils.data.DataLoader(ds, num_workers=0)))
        [tensor([3]), tensor([4]), tensor([5]), tensor([6])]

        >>> # xdoctest: +REQUIRES(POSIX)
        >>> # Mult-process loading with two worker processes
        >>> # Worker 0 fetched [3, 4].  Worker 1 fetched [5, 6].
        >>> # xdoctest: +IGNORE_WANT("non deterministic")
        >>> print(list(torch.utils.data.DataLoader(ds, num_workers=2)))
        [tensor([3]), tensor([5]), tensor([4]), tensor([6])]

        >>> # With even more workers
        >>> # xdoctest: +IGNORE_WANT("non deterministic")
        >>> print(list(torch.utils.data.DataLoader(ds, num_workers=12)))
        [tensor([3]), tensor([5]), tensor([4]), tensor([6])]

    Example 2: splitting workload across all workers using :attr:`worker_init_fn`::

        >>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_DATALOADER)
        >>> class MyIterableDataset(torch.utils.data.IterableDataset):
        ...     def __init__(self, start, end):
        ...         super(MyIterableDataset).__init__()
        ...         assert end > start, "this example code only works with end >= start"
        ...         self.start = start
        ...         self.end = end
        ...
        ...     def __iter__(self):
        ...         return iter(range(self.start, self.end))
        ...
        >>> # should give same set of data as range(3, 7), i.e., [3, 4, 5, 6].
        >>> ds = MyIterableDataset(start=3, end=7)

        >>> # Single-process loading
        >>> print(list(torch.utils.data.DataLoader(ds, num_workers=0)))
        [3, 4, 5, 6]
        >>>
        >>> # Directly doing multi-process loading yields duplicate data
        >>> print(list(torch.utils.data.DataLoader(ds, num_workers=2)))
        [3, 3, 4, 4, 5, 5, 6, 6]

        >>> # Define a `worker_init_fn` that configures each dataset copy differently
        >>> def worker_init_fn(worker_id):
        ...     worker_info = torch.utils.data.get_worker_info()
        ...     dataset = worker_info.dataset  # the dataset copy in this worker process
        ...     overall_start = dataset.start
        ...     overall_end = dataset.end
        ...     # configure the dataset to only process the split workload
        ...     per_worker = int(math.ceil((overall_end - overall_start) / float(worker_info.num_workers)))
        ...     worker_id = worker_info.id
        ...     dataset.start = overall_start + worker_id * per_worker
        ...     dataset.end = min(dataset.start + per_worker, overall_end)
        ...

        >>> # Mult-process loading with the custom `worker_init_fn`
        >>> # Worker 0 fetched [3, 4].  Worker 1 fetched [5, 6].
        >>> print(list(torch.utils.data.DataLoader(ds, num_workers=2, worker_init_fn=worker_init_fn)))
        [3, 5, 4, 6]

        >>> # With even more workers
        >>> print(list(torch.utils.data.DataLoader(ds, num_workers=12, worker_init_fn=worker_init_fn)))
        [3, 4, 5, 6]
    r+   c                     t        | |g      S r)   )r   r*   s     r%   r,   zIterableDataset.__add__   s    T5M**r'   N)r-   r.   r/   r0   r   r   r,   r1   r'   r%   r   r   M   s    jX+WU^ +r'   r   c                   B    e Zd ZU dZeedf   ed<   deddfdZd Zd Z	y)	r   zDataset wrapping tensors.

    Each sample will be retrieved by indexing tensors along the first dimension.

    Args:
        *tensors (Tensor): tensors that have the same size of the first dimension.
    .tensorsr   Nc                 J    t        fdD              sJ d       | _        y )Nc              3   j   K   | ]*  }d    j                  d       |j                  d       k(   , yw)r   N)size).0tensorr5   s     r%   	<genexpr>z)TensorDataset.__init__.<locals>.<genexpr>   s0      
5;GAJOOA&++a.0
s   03zSize mismatch between tensors)allr5   )r#   r5   s    `r%   __init__zTensorDataset.__init__   s3     
?F
 
 	+*	+ 
 r'   c                 @    t        fd| j                  D              S )Nc              3   (   K   | ]	  }|     y wr)   r1   )r9   r:   r$   s     r%   r;   z,TensorDataset.__getitem__.<locals>.<genexpr>   s     >vVE]>   )tupler5   r"   s    `r%   r&   zTensorDataset.__getitem__   s    >>>>r'   c                 >    | j                   d   j                  d      S Nr   )r5   r8   r#   s    r%   __len__zTensorDataset.__len__   s    ||A##A&&r'   )
r-   r.   r/   r0   r
   r   __annotations__r=   r&   rE   r1   r'   r%   r   r      s5     63; D ?'r'   r   c                   ^    e Zd ZU dZeeef   ed<   dee	   dee	   ddfdZ
d Zd	efd
Zd Zy)r   a  Dataset as a stacking of multiple datasets.

    This class is useful to assemble different parts of complex input data, given as datasets.

    Example:
        >>> # xdoctest: +SKIP
        >>> images = ImageDataset()
        >>> texts = TextDataset()
        >>> tuple_stack = StackDataset(images, texts)
        >>> tuple_stack[0] == (images[0], texts[0])
        >>> dict_stack = StackDataset(image=images, text=texts)
        >>> dict_stack[0] == {'image': images[0], 'text': texts[0]}

    Args:
        *args (Dataset): Datasets for stacking returned as tuple.
        **kwargs (Dataset): Datasets for stacking returned as dict.
    datasetsargskwargsr   Nc                 V    |rG|rt        d      t        |d          _        t         fd|D              rt        d      | _        y |rSt        |j                               }t        |d          _        t         fd|D              rt        d      | _        y t        d      )NztSupported either ``tuple``- (via ``args``) or``dict``- (via ``kwargs``) like input/output, but both types are given.r   c              3   N   K   | ]  }j                   t        |      k7    y wr)   _lengthlenr9   datasetr#   s     r%   r;   z(StackDataset.__init__.<locals>.<genexpr>   s     DG4<<3w</D   "%zSize mismatch between datasetsc              3   N   K   | ]  }j                   t        |      k7    y wr)   rM   rP   s     r%   r;   z(StackDataset.__init__.<locals>.<genexpr>   s     CG4<<3w</CrR   z%At least one dataset should be passed)
ValueErrorrO   rN   anyrH   listvalues)r#   rI   rJ   tmps   `   r%   r=   zStackDataset.__init__   s     ^  tAw<DLDtDD !ABB DMv}}'Cs1v;DLCsCC !ABB"DMDEEr'   c                     t        | j                  t              r1| j                  j                         D ci c]  \  }}||    c}}S t	        fd| j                  D              S c c}}w )Nc              3   (   K   | ]	  }|     y wr)   r1   )r9   rQ   r$   s     r%   r;   z+StackDataset.__getitem__.<locals>.<genexpr>  s     AWU^Ar@   )
isinstancerH   dictitemsrA   )r#   r$   krQ   s    `  r%   r&   zStackDataset.__getitem__  sW    dmmT*8<8K8K8MN*!WAwu~%NNA4==AAA Os   A+indicesc           	         t        | j                  t              r|D cg c]  }i  }}| j                  j                         D ]  \  }}t	        t        |dd             re|j                  |      }t        |      t        |      k7  r#t        dt        |       dt        |             t        ||      D ]
  \  }}|||<    t        ||      D ]  \  }	}||	   ||<     |S |D cg c]  }g  }
}| j                  D ]  }t	        t        |dd             rq|j                  |      }t        |      t        |      k7  r#t        dt        |       dt        |             t        ||
      D ]  \  }}|j                  |        t        ||
      D ]  \  }	}|j                  ||	            |
D cg c]  }t        |       }}|S c c}w c c}w c c}w )N__getitems__z0Nested dataset's output size mismatch. Expected z, got )r[   rH   r\   r]   callablegetattrra   rO   rT   zipappendrA   )r#   r_   _
dict_batchr^   rQ   r]   datad_sampleidx
list_batcht_samplesampletuple_batchs                 r%   ra   zStackDataset.__getitems__  s   dmmT*5<(=(=J(="mm113 3
7GG^TBC#009E5zS\1()),WfSZLJ  +.eZ*@ +h&*+ *-Wj)A 3X&-cl33  /6!6"!6
!6}} 	2G>?,,W5u:W-$%%(\N&UF  '*%&< *ND(OOD)* &)*%= 2MCOOGCL12	2 DN&NuV}&N&NA )>" "7 'Os   	G)	G8Gc                     | j                   S r)   )rN   rD   s    r%   rE   zStackDataset.__len__,  s    ||r'   )r-   r.   r/   r0   r   rA   r\   rF   r   r   r=   r&   rV   ra   rE   r1   r'   r%   r   r      sV    $ E4K  Fgen F F4 F(B
#D #Jr'   r   c                        e Zd ZU dZeee      ed<   ee   ed<   e	d        Z
dee   ddf fdZd Zd	 Ze ed
e      d               Z xZS )r   zDataset as a concatenation of multiple datasets.

    This class is useful to assemble different existing datasets.

    Args:
        datasets (sequence): List of datasets to be concatenated
    rH   cumulative_sizesc                 d    g d}}| D ]&  }t        |      }|j                  ||z          ||z  }( |S rC   )rO   re   )sequencersels        r%   cumsumzConcatDataset.cumsum<  sB    11 	AAAHHQUOFA	 r'   r   Nc                    t         |           t        |      | _        t	        | j                        dkD  sJ d       | j                  D ]  }t        |t              sJ d        | j                  | j                        | _        y )Nr   z(datasets should not be an empty iterablez.ConcatDataset does not support IterableDataset)	superr=   rV   rH   rO   r[   r   rx   rq   )r#   rH   d	__class__s      r%   r=   zConcatDataset.__init__E  s    X4==!A%Q'QQ% 	@A!? @?@ 	@ !%DMM :r'   c                      | j                   d   S )Nrq   rD   s    r%   rE   zConcatDataset.__len__O  s    $$R((r'   c                     |dk  r(| t        |       kD  rt        d      t        |       |z   }t        j                  | j                  |      }|dk(  r|}n|| j                  |dz
     z
  }| j
                  |   |   S )Nr   z8absolute value of index should not exceed dataset length   )rO   rT   bisectbisect_rightrq   rH   )r#   rj   dataset_idx
sample_idxs       r%   r&   zConcatDataset.__getitem__R  s    7tc$i N  d)c/C))$*?*?E!Jt44[1_EEJ}}[)*55r'   z>`cummulative_sizes` attribute is renamed to `cumulative_sizes`)categoryc                     | j                   S r)   r   rD   s    r%   cummulative_sizeszConcatDataset.cummulative_sizes`  s     $$$r'   )r-   r.   r/   r0   r   r   r   rF   intstaticmethodrx   r   r=   rE   r&   propertyr   FutureWarningr   __classcell__r|   s   @r%   r   r   0  s~     75>""3i ;'!2 ;t ;)6 H%	 
%r'   r   c                   >     e Zd ZdZdee   ddf fdZd Zd Z xZ	S )r   a_  Dataset for chaining multiple :class:`IterableDataset` s.

    This class is useful to assemble different existing dataset streams. The
    chaining operation is done on-the-fly, so concatenating large-scale
    datasets with this class will be efficient.

    Args:
        datasets (iterable of IterableDataset): datasets to be chained together
    rH   r   Nc                 0    t         |           || _        y r)   )rz   r=   rH   )r#   rH   r|   s     r%   r=   zChainDataset.__init__t  s     r'   c              #   t   K   | j                   D ]#  }t        |t              sJ d       |E d {    % y 7 w)N*ChainDataset only supports IterableDataset)rH   r[   r   )r#   r{   s     r%   __iter__zChainDataset.__iter__x  sF      	A? <;<  LL		 s   ,868c                 v    d}| j                   D ]'  }t        |t              sJ d       |t        |      z  }) |S )Nr   r   )rH   r[   r   rO   )r#   totalr{   s      r%   rE   zChainDataset.__len__  sO     	A? <;<  SVOE		
 r'   )
r-   r.   r/   r0   r   r   r=   r   rE   r   r   s   @r%   r   r   i  s*    !'!2 !t !r'   r   c                   z    e Zd ZU dZee   ed<   ee   ed<   dee   dee   ddfdZ	d Z
dee   dee   fdZd	 Zy)
r   z
    Subset of a dataset at specified indices.

    Args:
        dataset (Dataset): The whole Dataset
        indices (sequence): Indices in the whole set selected for subset
    rQ   r_   r   Nc                      || _         || _        y r)   )rQ   r_   )r#   rQ   r_   s      r%   r=   zSubset.__init__  s    r'   c                     t        |t              r*| j                  |D cg c]  }| j                  |    c}   S | j                  | j                  |      S c c}w r)   )r[   rV   rQ   r_   )r#   rj   is      r%   r&   zSubset.__getitem__  sK    c4 <<# >Qa >??||DLL-.. !?s   Ac                    t        t        | j                  dd             r6| j                  j                  |D cg c]  }| j                  |    c}      S |D cg c]  }| j                  | j                  |        c}S c c}w c c}w )Nra   )rb   rc   rQ   ra   r_   )r#   r_   rj   s      r%   ra   zSubset.__getitems__  sn     GDLL.$?@<<,,7-SCdll3.?-STT?FGDLLc!23GG .TGs   B#Bc                 ,    t        | j                        S r)   )rO   r_   rD   s    r%   rE   zSubset.__len__  s    4<<  r'   )r-   r.   r/   r0   r   r   rF   r	   r   r=   r&   r   ra   rE   r1   r'   r%   r   r     sg     U^c] # 4 /
HDI H$u+ H!r'   r   rQ   lengths	generatorr   c           
      d   t        j                  t        |      d      rt        |      dk  rg }t        |      D ]Y  \  }}|dk  s|dkD  rt	        d| d      t        t        j                  t        |       |z              }|j                  |       [ t        |       t        |      z
  }t        |      D ]  }|t        |      z  }||xx   dz  cc<    |}t        |      D ]$  \  }}	|	dk(  st        j                  d| d       & t        |      t        |       k7  rt	        d      t        t        |      |      j                         }
t        t        t
           |      }t!        t#        j$                  |      |      D 	cg c]  \  }}	t'        | |
||	z
  |        c}	}S c c}	}w )	a  
    Randomly split a dataset into non-overlapping new datasets of given lengths.

    If a list of fractions that sum up to 1 is given,
    the lengths will be computed automatically as
    floor(frac * len(dataset)) for each fraction provided.

    After computing the lengths, if there are any remainders, 1 count will be
    distributed in round-robin fashion to the lengths
    until there are no remainders left.

    Optionally fix the generator for reproducible results, e.g.:

    Example:
        >>> # xdoctest: +SKIP
        >>> generator1 = torch.Generator().manual_seed(42)
        >>> generator2 = torch.Generator().manual_seed(42)
        >>> random_split(range(10), [3, 7], generator=generator1)
        >>> random_split(range(30), [0.3, 0.3, 0.4], generator=generator2)

    Args:
        dataset (Dataset): Dataset to be split
        lengths (sequence): lengths or fractions of splits to be produced
        generator (Generator): Generator used for the random permutation.
    r   r   zFraction at index z is not between 0 and 1zLength of split at index z- is 0. This might result in an empty dataset.zDSum of input lengths does not equal the length of the input dataset!)r   )mathisclosesum	enumeraterT   r   floorrO   re   rangewarningswarnr   tolistr   r	   rd   	itertools
accumulater   )rQ   r   r   subset_lengthsr   fracn_items_in_split	remainderidx_to_add_atlengthr_   offsets               r%   r   r     s   < ||CL!$W):$& ) 	4GAtax4!8 #5aS8O!PQQ"

3w<$./  !!"23	4 L3~#66	y! 	/AN 33M=)Q.)	/ !"7+ 	IAv{/s 3= >	 7|s7|#R
 	
 s7|y9@@BG8C='*G ")"6"6w"?IFF 	w&9:  s   F,)'r   r   r   r   typingr   r   r   r   r   r   r	   r
   r   r   typing_extensionsr   torchr   r   r   r   __all__r   r   str_T_dict_T_tupler   r   r   r   r   r   r   r   r   floatr   r1   r'   r%   <module>r      sK         ) A @	 T]4(
sEz
:x1,gen ,Dn+genhuo n+h'GE&#+./ '0T78$ Tn6%GEN 6%r? @!WU^ !H &7?R[?eCJ'(? 	"? 
&*	?r'   