
    sg{1                        d Z ddlmZ ddlmZmZmZmZ ddlm	Z	 ddl
mZmZmZ ddlmZmZmZmZmZ ddlmZmZ dd	lmZ erdd
lmZ  ej4                  e      ZdefdZd Z G d ded      Z  G d ded      Z! G d de      Z"y)z
Processor class for IDEFICS2.
    )
accumulate)TYPE_CHECKINGListOptionalUnion   )BatchFeature)
ImageInputis_valid_image
load_image)ImagesKwargsProcessingKwargsProcessorMixinUnpack!_validate_images_text_input_order)
AddedToken	TextInput)logging)PreTokenizedInputreturnc                 H    t        | t              xr | j                  d      S )Nhttp)
isinstancestr
startswith)vals    c/var/www/html/venv/lib/python3.12/site-packages/transformers/models/idefics2/processing_idefics2.pyis_urlr   *   s    c3:CNN6$::    c                 2    t        |       xs t        |       S N)r   r   )elems    r   is_image_or_image_urlr#   .   s    $</>$//r   c                       e Zd ZU ee   ed<   y)Idefics2ImagesKwargsimage_seq_lenN)__name__
__module____qualname__r   int__annotations__ r   r   r%   r%   2   s    C= r   r%   F)totalc                   *    e Zd ZU eed<   ddddi dZy)Idefics2ProcessorKwargsimages_kwargsTF)add_special_tokenspaddingis_split_into_words)text_kwargsr0   N)r'   r(   r)   r%   r+   	_defaultsr,   r   r   r/   r/   6   s$    '' #'#(

 Ir   r/   c            
            e Zd ZdZddgZddgZdZdZddede	f fdZ
d	 Z	 	 	 	 dd
eeee   eee      f   deedee   ed   f   dee   defdZd Zd Zed        Z xZS )Idefics2Processora  
    Constructs a IDEFICS2 processor which wraps a LLama tokenizer and IDEFICS2 image processor into a single processor.

    [`IdeficsProcessor`] offers all the functionalities of [`Idefics2ImageProcessor`] and [`LlamaTokenizerFast`]. See
    the docstring of [`~IdeficsProcessor.__call__`] and [`~IdeficsProcessor.decode`] for more information.

    Args:
        image_processor (`Idefics2ImageProcessor`):
            An instance of [`Idefics2ImageProcessor`]. The image processor is a required input.
        tokenizer (`PreTrainedTokenizerBase`, *optional*):
            An instance of [`PreTrainedTokenizerBase`]. This should correspond with the model's text model. The tokenizer is a required input.
        image_seq_len (`int`, *optional*, defaults to 64):
            The length of the image sequence i.e. the number of <image> tokens per image in the input.
            This parameter is used to build the string from the input prompt and image tokens and should match the
            config.perceiver_config.resampler_n_latents value for the model used.
        chat_template (`str`, *optional*): A Jinja template which will be used to convert lists of messages
            in a chat into a tokenizable string.
    image_processor	tokenizerr&   chat_templateIdefics2ImageProcessorAutoTokenizerc                    |t        d      |t        d      t        |d      sRt        ddd      | _        t        ddd      | _        d	| j                  | j                  gi}|j                  |       n"|j                  | _        |j                  | _        t        d
dd      | _        |j                  d	| j                  gi       || _        t        | )  |||       y )Nz)You need to specify an `image_processor`.z"You need to specify a `tokenizer`.image_tokenz<fake_token_around_image>FT)
normalizedspecialz<image>additional_special_tokensz<end_of_utterance>)r:   )
ValueErrorhasattrr   fake_image_tokenr>   r1   image_boundary_tokenend_of_utterance_tokenr&   super__init__)selfr8   r9   r&   r:   kwargstokens_to_add	__class__s          r   rH   zIdefics2Processor.__init__\   s    "HIIABBy-0$./JW\fj$kD!))tTD84;P;PRVRbRb:cdM((7$-$B$BD!(44D&01ERWae&f#$$&ADD_D_C`%ab*)=Qr   c                     g }|D ]_  }g }|D ]E  }t        |      r|j                  |        t        |      s,|j                  t        |             G |j                  |       a |S r!   )r   appendr   r   )rI   promptsprompt_imagespromptimagesr"   s         r   _extract_images_from_promptsz.Idefics2Processor._extract_images_from_promptsq   sn     	)FF 4!$'MM$'D\MM*T"23	4
   (	) r   rR   textr   rJ   r   c                 d   ||t        d      t        ||      \  }} | j                  t        fd| j                  j
                  i|}|d   j                  dd      }||n| j                  }g }t               }	|t        |t              r|g}n.t        |t              st        |d   t              st        d      | j                  j                  }
| j                  j                  }|
 ||z   |
 }| j                  j                   r|dz  }g }|D ]\  }|j#                  |j%                  |             |j'                  ||      }|j'                  |
 |
 |
       }|j#                  |       ^  | j                  |fi |d	   }|	j)                  |       |zt+        |      r|gg}nt        |t              rt+        |d         r|t-        |      t/        |      k7  r*t        d
 dt-        |       d| dt/        |       d	      dgt        t1        |            z   }t3        t/        |            D cg c]  }|||   ||dz        }}nC|g}n?t        |t              s/t        |d   t              st+        |d   d         st        d      |D cg c]  }t/        |       }}|||k(  st        d| d| d      |D cg c]  }|D cg c]  }t5        |       c} }}} | j                  |fi |d   }|	j)                  |       |	S c c}w c c}w c c}w c c}}w )a
  
        Processes the input prompts and returns a BatchEncoding.

        Example:

        ```python
        >>> import requests
        >>> from transformers import Idefics2Processor
        >>> from transformers.image_utils import load_image

        >>> processor = Idefics2Processor.from_pretrained("HuggingFaceM4/idefics2-8b", image_seq_len=2)
        >>> processor.image_processor.do_image_splitting = False  # Force as False to simplify the example

        >>> url1 = "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
        >>> url2 = "https://cdn.britannica.com/59/94459-050-DBA42467/Skyline-Chicago.jpg"

        >>> image1, image2 = load_image(url1), load_image(url2)
        >>> images = [[image1], [image2]]

        >>> text = [
        ...     "<image>In this image, we see",
        ...     "bla bla bla<image>",
        ... ]
        >>> outputs = processor(images=images, text=text, return_tensors="pt", padding=True)
        >>> input_ids = outputs.input_ids
        >>> input_tokens = processor.tokenizer.batch_decode(input_ids)
        >>> print(input_tokens)
        ['<s><fake_token_around_image><image><image><fake_token_around_image> In this image, we see', '<s> bla bla bla<fake_token_around_image><image><image><fake_token_around_image>']
        ```

        Args:
            images (`PIL.Image.Image`, `np.ndarray`, `torch.Tensor`, `List[PIL.Image.Image]`, `List[np.ndarray]`, `List[torch.Tensor]`, *optional*):
                The image or batch of images to be prepared. Each image can be a PIL image, NumPy array or PyTorch
                tensor. If is of type `List[ImageInput]`, it's assumed that this is for a single prompt i.e. of batch size 1.
            text (`Union[TextInput, PreTokenizedInput, List[TextInput], List[PreTokenizedInput]]`, *optional*):
                The sequence or batch of sequences to be encoded. Each sequence can be a string or a list of strings
                (pretokenized string). If the sequences are provided as list of strings (pretokenized), you must set
                `is_split_into_words=True` (to lift the ambiguity with a batch of sequences).

                Wherever an image token, `<image>` is encountered it is expanded to
                `<fake_token_around_image>` + `<image>` * `image_seq_len` * <fake_token_around_image>`.
            return_tensors (`Union[str, TensorType]`, *optional*):
                If set, will return tensors of a particular framework. See [`PreTrainedTokenizerFast.__call__`] for more
                information.

        Nz+You must provide either `text` or `images`.tokenizer_init_kwargsr0   r&   r   zAInvalid input text. Please provide a string, or a list of strings   r4   zThe total number of zP tokens in the prompts should be the same as the number of images passed. Found  z tokens and z images.   zdInvalid input images. Please provide a single image or a list of images or a list of list of images.z!The number of images in the text z and images  z should be the same.)rB   r   _merge_kwargsr/   r9   init_kwargspopr&   r	   r   r   listrD   contentr>   r8   do_image_splittingrN   countreplaceupdater#   sumlenr   ranger   )rI   rR   rT   audiovideosrJ   output_kwargsr&   n_images_in_textinputsrD   r>   	image_strprompt_stringssampletext_inputscumsum_images_in_textin_images_in_imagesimimage_inputss                        r   __call__zIdefics2Processor.__call__}   s   l <FNJKK8F***#
"&.."<"<
 

 &o6::?DQ)6)BHZHZ$$vd+JtAw4L !dee  $44<<**22K+,[=-H,IJZI[\I##66%M	N . ''[(ABY?+;*<=M<N(OTdSeg%%f-. )$..X=;WXKMM+&$V,!(FD).CF1I.N#+,F;(2;- @&&)*:&;%<Ak],WZ[aWbVcckm 
 ./C$zBR7S2T,T) "'s+;'<!= 4Q7:OPQTUPU:VWF 
 %XF vt,"6!9d3-fQil; z  =C!C&#f+!C!C(:>N(N 78H7IWiVjj~ 
 GMMF7"z"~7MFM/4//Y-:XYLMM,'7  "D 8Ms$    LL"	L,L',L,'L,c                 :     | j                   j                  |i |S )z
        This method forwards all its arguments to LlamaTokenizerFast's [`~PreTrainedTokenizer.batch_decode`]. Please
        refer to the docstring of this method for more information.
        )r9   batch_decoderI   argsrJ   s      r   rv   zIdefics2Processor.batch_decode  s     
 +t~~**D;F;;r   c                 :     | j                   j                  |i |S )z
        This method forwards all its arguments to LlamaTokenizerFast's [`~PreTrainedTokenizer.decode`]. Please refer to
        the docstring of this method for more information.
        )r9   decoderw   s      r   rz   zIdefics2Processor.decode  s     
 %t~~$$d5f55r   c                     | j                   j                  }| j                  j                  }t        t        j                  ||z               S r!   )r9   model_input_namesr8   r]   dictfromkeys)rI   tokenizer_input_namesimage_processor_input_namess      r   r|   z#Idefics2Processor.model_input_names  s?     $ @ @&*&:&:&L&L#DMM"7:U"UVWWr   )N@   N)NNNN)r'   r(   r)   __doc__
attributesvalid_kwargsimage_processor_classtokenizer_classr*   r   rH   rS   r   r
   r   r   r   r/   r	   rt   rv   rz   propertyr|   __classcell__)rL   s   @r   r7   r7   C   s    & $[1J#_5L4%ORs R`c R*
 OSbfGj$z"2Dj9I4JJKG I2DOTJ]E^^_G 01G 
GR<6 X Xr   r7   N)#r   	itertoolsr   typingr   r   r   r   feature_extraction_utilsr	   image_utilsr
   r   r   processing_utilsr   r   r   r   r   tokenization_utils_baser   r   utilsr   r   
get_loggerr'   loggerboolr   r#   r%   r/   r7   r,   r   r   <module>r      s    ! 7 7 4 A A  =  < 
		H	%;4 ;0!<u !
.e 
UX UXr   