
    sg=3                         d Z ddlmZmZ ddlmZ ddlmZmZm	Z	 ddl
mZmZmZmZ ddlmZmZ ddlmZmZmZmZmZ  ej0                  e      Z G d	 d
ed      ZdefdZd Z G d de      Z G d de      Z y)z
Processor class for Pixtral.
    )ListUnion   )BatchFeature)
ImageInputis_valid_image
load_image)ProcessingKwargsProcessorMixinUnpack!_validate_images_text_input_order)PreTokenizedInput	TextInput)is_torch_deviceis_torch_dtypeis_torch_tensorloggingrequires_backendsc                        e Zd Zddii ddidZy)PixtralProcessorKwargspaddingFreturn_tensorspt)text_kwargsimages_kwargscommon_kwargsN)__name__
__module____qualname__	_defaults     a/var/www/html/venv/lib/python3.12/site-packages/transformers/models/pixtral/processing_pixtral.pyr   r      s$     u
 d
Ir"   r   F)totalreturnc                 H    t        | t              xr | j                  d      S )Nhttp)
isinstancestr
startswith)vals    r#   is_urlr,   ,   s    c3:CNN6$::r"   c                 2    t        |       xs t        |       S N)r,   r   )elems    r#   is_image_or_image_urlr0   1   s    $</>$//r"   c                       e Zd ZddZy)BatchMixFeaturec                    t        | dg       ddl}i }|j                  d      }|et        |      dkD  rW|d   }t	        |      rnFt        |t              st        |      st        |t              r|}nt        dt        |       d      | j                         D ]  \  }}t        |t              r9|D 	
cg c]'  }	|	D ]   }
t        |
      s |
j                  |i |" ) c}
}	||<   Ot        ||j                        r'|j                  |      r |j                  |i |||<   t        ||j                        r||j                  |      ||<   |||<    || _        | S c c}
}	w )a  
        Send all values to device by calling `v.to(*args, **kwargs)` (PyTorch only). This should support casting in
        different `dtypes` and sending the `BatchFeature` to a different `device`.

        Args:
            args (`Tuple`):
                Will be passed to the `to(...)` function of the tensors.
            kwargs (`Dict`, *optional*):
                Will be passed to the `to(...)` function of the tensors.

        Returns:
            [`BatchFeature`]: The same instance after modification.
        torchr   Ndevicez*Attempting to cast a BatchFeature to type z. This is not supported.)r5   )r   r4   getlenr   r(   r)   r   int
ValueErroritemslistr   toTensoris_floating_pointdata)selfargskwargsr4   new_datar5   argkvsampleelements              r#   r<   zBatchMixFeature.to7   sf    	$	*H%>c$i!mq'Cc"C%)=CQTAU !#McRUhZWo!pqqJJL 	 DAq!T">?4:vDKYhipYqJGJJ/// Au||,1H1H1K"addD3F3Au||,1Cdd&d1	  	s   7E&E&N)r%   r2   )r   r   r   r<   r!   r"   r#   r2   r2   6   s    .r"   r2   c            
            e Zd ZdZddgZg dZdZdZ	 	 	 	 	 	 	 ddef fdZ		 	 	 	 dd	e
d
eeeee   ee   f   dee   defdZd Zd Zed        Z xZS )PixtralProcessora  
    Constructs a Pixtral processor which wraps a Pixtral image processor and a Pixtral tokenizer into a single processor.

    [`PixtralProcessor`] offers all the functionalities of [`CLIPImageProcessor`] and [`LlamaTokenizerFast`]. See the
    [`~PixtralProcessor.__call__`] and [`~PixtralProcessor.decode`] for more information.

    Args:
        image_processor ([`PixtralImageProcessor`], *optional*):
            The image processor is a required input.
        tokenizer ([`LlamaTokenizerFast`], *optional*):
            The tokenizer is a required input.
        patch_size (`int`, *optional*, defaults to 16):
            Patch size from the vision tower.
        chat_template (`str`, *optional*): A Jinja template which will be used to convert lists of messages
            in a chat into a tokenizable string.
        image_token (`str`, *optional*, defaults to `"[IMG]"`):
            Special token used to denote image location.
        image_break_token (`str`, *optional*, defaults to `"[IMG_BREAK]"`):
            Special token used to denote the end of a line of pixels in an image.
        image_end_token (`str`, *optional*, defaults to `"[IMG_END]"`):
            Special token used to denote the end of an image input.
    image_processor	tokenizer)chat_template
patch_sizeimage_tokenimage_break_tokenimage_end_tokenAutoImageProcessorAutoTokenizerrN   c                 b    || _         || _        || _        || _        t        	|   |||       y )N)rM   )rN   rO   rP   rQ   super__init__)
r@   rK   rL   rN   rM   rO   rP   rQ   rB   	__class__s
            r#   rV   zPixtralProcessor.__init__   s8     %&!2.)=Qr"   imagestextrB   r%   c           
         t        ||      \  }} | j                  t        fd| j                  j                  i|}|t        |      r|gg}nt        |t              r3t        |d         r%t        |t              r|D cg c]  }|g }}nD|g}n@t        |t              r%t        |d   t              rt        |d   d         rnt        d      |D cg c]  }|D cg c]  }t        |       c} }}} | j                  |fd| j                  i|d   }	ni }	t        |t              r|g}n.t        |t              st        |d   t              st        d      |}
|	j                  d      /|	d   }|	j                  d      }g }
t        |||      D ]  \  }}}g }t        ||      D ]  \  }}|\  }}|| j                  z  }|| j                  z  }| j                   g|z  | j"                  gz   g|z  }|D cg c]  }|D ]  }|  }}}| j$                  |d	<   d
j'                  |      }|j)                  |       |j+                  | j                   dd      } d|v r)|j                  d      }|j+                  d|d      }d|v r)|
j)                  |       	  | j                  |
fi |d   }t-        i ||	      S c c}w c c}w c c}}w c c}}w )a  
        Main method to prepare for the model one or several sequences(s) and image(s). This method forwards the `text`
        and `kwargs` arguments to LlamaTokenizerFast's [`~LlamaTokenizerFast.__call__`] if `text` is not `None` to encode
        the text. To prepare the image(s), this method forwards the `images` and `kwrags` arguments to
        CLIPImageProcessor's [`~CLIPImageProcessor.__call__`] if `images` is not `None`. Please refer to the doctsring
        of the above two methods for more information.

        Args:
            images (`PIL.Image.Image`, `np.ndarray`, `torch.Tensor`, `List[PIL.Image.Image]`, `List[np.ndarray]`, `List[torch.Tensor]`):
                The image or batch of images to be prepared. Each image can be a PIL image, NumPy array or PyTorch
                tensor. Both channels-first and channels-last formats are supported.
            text (`str`, `List[str]`, `List[List[str]]`):
                The sequence or batch of sequences to be encoded. Each sequence can be a string or a list of strings
                (pretokenized string). If the sequences are provided as list of strings (pretokenized), you must set
                `is_split_into_words=True` (to lift the ambiguity with a batch of sequences).
            return_tensors (`str` or [`~utils.TensorType`], *optional*):
                If set, will return tensors of a particular framework. Acceptable values are:

                - `'tf'`: Return TensorFlow `tf.constant` objects.
                - `'pt'`: Return PyTorch `torch.Tensor` objects.
                - `'np'`: Return NumPy `np.ndarray` objects.
                - `'jax'`: Return JAX `jnp.ndarray` objects.

        Returns:
            [`BatchFeature`]: A [`BatchFeature`] with the following fields:

            - **input_ids** -- List of token ids to be fed to a model. Returned when `text` is not `None`.
            - **attention_mask** -- List of indices specifying which tokens should be attended to by the model (when
              `return_attention_mask=True` or if *"attention_mask"* is in `self.model_input_names` and if `text` is not
            `None`).
            - **pixel_values** -- Pixel values to be fed to a model. Returned when `images` is not `None`.
        tokenizer_init_kwargsr   zdInvalid input images. Please provide a single image, a list of images, or a list of lists of images.rN   r   zAInvalid input text. Please provide a string, or a list of stringspixel_valuesimage_sizes z<placeholder>   r   )r?   )r   _merge_kwargsr   rL   init_kwargsr0   r(   r;   r9   r	   rK   rN   r)   r6   popziprO   rP   rQ   joinappendreplacer2   )r@   rX   rY   audiovideosrB   output_kwargsimrG   image_inputsprompt_stringsr]   sample_imagessample_image_sizesreplace_stringsimage
image_sizeheightwidthnum_height_tokensnum_width_tokensreplace_tokenssublistitemreplace_strtext_inputss                             r#   __call__zPixtralProcessor.__call__   s$   R 9F***"
"&.."<"<
 
 $V,!(FD).CF1I.NdD)-34rrd4F4$XFFD)jD.INcdjkldmnodpNq z  GMMF7"z"~7MFM/4//u4??uVcdsVtuLLdC 6DD$'
47C0H`aa N+7!.1F&**=9KN=@VZ=[ .916"$),]<N)O R%E:$.MFE(.$//(A%','?$))*-==AWAW@XX&)&*N ;I%]wU\%]Td%]d%]N%])-)=)=N2&"$''."9K#**;7#^^D,<,<oqQFR &/"1"5"5a"8K#^^O[!LF &/ %%f-)., %dnn^T}]7ST$C{$Cl$CDDe 5 8M: &^s$   
K
	K K2K K
Kc                 :     | j                   j                  |i |S )z
        This method forwards all its arguments to LlamaTokenizerFast's [`~PreTrainedTokenizer.batch_decode`]. Please
        refer to the docstring of this method for more information.
        )rL   batch_decoder@   rA   rB   s      r#   r~   zPixtralProcessor.batch_decode  s     
 +t~~**D;F;;r"   c                 :     | j                   j                  |i |S )z
        This method forwards all its arguments to LlamaTokenizerFast's [`~PreTrainedTokenizer.decode`]. Please refer to
        the docstring of this method for more information.
        )rL   decoder   s      r#   r   zPixtralProcessor.decode  s     
 %t~~$$d5f55r"   c                     | j                   j                  }| j                  j                  }t        t        j                  ||z               S r.   )rL   model_input_namesrK   r;   dictfromkeys)r@   tokenizer_input_namesimage_processor_input_namess      r#   r   z"PixtralProcessor.model_input_names  sA     !% @ @&*&:&:&L&L#DMM"7:U"UVWWr"   )NN   Nz[IMG]z[IMG_BREAK]z	[IMG_END])NNNN)r   r   r   __doc__
attributesvalid_kwargsimage_processor_classtokenizer_classr8   rV   r   r   r   r   r   r   r   r2   r|   r~   r   propertyr   __classcell__)rW   s   @r#   rJ   rJ   h   s    . $[1JL 1%O '#R 	R& "^bhEhE I0$y/4HYCZZ[hE /0hE 
hEV<6 X Xr"   rJ   N)!r   typingr   r   feature_extraction_utilsr   image_utilsr   r   r	   processing_utilsr
   r   r   r   tokenization_utils_baser   r   utilsr   r   r   r   r   
get_loggerr   loggerr   boolr,   r0   r2   rJ   r!   r"   r#   <module>r      sx     4 A A k k C a a 
		H	%	-U 	;4 ;
0
/l /dsX~ sXr"   