
    sg(                         d Z ddlZddlmZmZ ddlmZ ddlmZ ddl	m
Z
mZmZ ddlmZmZmZmZ dd	lmZ d
dlmZ  ej,                  e      Z G d de
d      Z G d de      Zy)zq
Processor class for InstructBLIP. Largely copy of Blip2Processor with addition of a tokenizer for the Q-Former.
    N)ListUnion   )BatchFeature)
ImageInput)ProcessingKwargsProcessorMixinUnpack)
AddedTokenBatchEncodingPreTokenizedInput	TextInput)logging   )AutoTokenizerc            
       *    e Zd Zdddddddddd	i dZy)InstructBlipProcessorKwargsTFr   )	add_special_tokenspaddingstridereturn_overflowing_tokensreturn_special_tokens_maskreturn_offsets_mappingreturn_token_type_idsreturn_lengthverbose)text_kwargsimages_kwargsN)__name__
__module____qualname__	_defaults     k/var/www/html/venv/lib/python3.12/site-packages/transformers/models/instructblip/processing_instructblip.pyr   r   &   s0     #').*/&+%*"

 Ir$   r   F)totalc            
            e Zd ZdZg dZdgZdZdZdZd fd	Z		 	 	 	 dde
deeeee   ee   f   d	ee   d
efdZd Zd Zed        Z fdZe fd       Z xZS )InstructBlipProcessora  
    Constructs an InstructBLIP processor which wraps a BLIP image processor and a LLaMa/T5 tokenizer into a single
    processor.

    [`InstructBlipProcessor`] offers all the functionalities of [`BlipImageProcessor`] and [`AutoTokenizer`]. See the
    docstring of [`~BlipProcessor.__call__`] and [`~BlipProcessor.decode`] for more information.

    Args:
        image_processor (`BlipImageProcessor`):
            An instance of [`BlipImageProcessor`]. The image processor is a required input.
        tokenizer (`AutoTokenizer`):
            An instance of ['PreTrainedTokenizer`]. The tokenizer is a required input.
        qformer_tokenizer (`AutoTokenizer`):
            An instance of ['PreTrainedTokenizer`]. The Q-Former tokenizer is a required input.
        num_query_tokens (`int`, *optional*):"
            Number of tokens used by the Qformer as queries, should be same as in model's config.
    )image_processor	tokenizerqformer_tokenizernum_query_tokensBlipImageProcessorr   c                     t        |d      s2t        ddd      | _        |j                  | j                  gd       n|j                  | _        || _        t
        |   |||       y )Nimage_tokenz<image>FT)
normalizedspecial)special_tokens)hasattrr   r/   
add_tokensr,   super__init__)selfr)   r*   r+   r,   kwargs	__class__s         r%   r6   zInstructBlipProcessor.__init__P   sb    y-0))tTD  $"2"2!3D I(44D 0)5FGr$   imagestextr8   returnc                    ||t        d       | j                  t        fd| j                  j                  i|}t               }|ut        |t              r|g}n.t        |t              st        |d   t              st        d      |d   j                  dd      } | j                  |fi |d   ddi}	||d   d<   | j                  {|yi }
| j                  j                  | j                  z  }| j                  |gt        |      z  dd	      }|	D ]-  }t        ||   |	|         D cg c]
  \  }}||z    c}}|
|<   / n|	}
|t        j!                  d
       t#        |
|      }
|j%                  |
        | j&                  |fi |d   }|j                  d      |d<   |j                  d      |d<   |' | j(                  |fi |d   }|j%                  |       |S c c}}w )a  
        This method uses [`BlipImageProcessor.__call__`] method to prepare image(s) for the model, and
        [`BertTokenizerFast.__call__`] to prepare text for the model.

        Please refer to the docstring of the above two methods for more information.
        Args:
            images (`ImageInput`):
                The image or batch of images to be prepared. Each image can be a PIL image, NumPy array or PyTorch
                tensor. Both channels-first and channels-last formats are supported.
            text (`TextInput`, `PreTokenizedInput`, `List[TextInput]`, `List[PreTokenizedInput]`):
                The sequence or batch of sequences to be encoded. Each sequence can be a string or a list of strings
                (pretokenized string). If the sequences are provided as list of strings (pretokenized), you must set
                `is_split_into_words=True` (to lift the ambiguity with a batch of sequences).
        Nz,You have to specify at least images or text.tokenizer_init_kwargsr   zAInvalid input text. Please provide a string, or a list of stringsr   return_tensorsF)r   r?   aA  Expanding inputs for image tokens in InstructBLIP should be done in processing. Please follow instruction here (https://gist.github.com/zucchini-nlp/e9f20b054fa322f84ac9311d9ab67042) to update your InstructBLIP model. Using processors without these attributes in the config is deprecated and will throw an error in v4.50.)tensor_type	input_idsqformer_input_idsattention_maskqformer_attention_maskr   )
ValueError_merge_kwargsr   r*   init_kwargsr   
isinstancestrlistpopr,   r/   contentlenziploggerwarning_oncer   updater+   r)   )r7   r:   r;   audiovideosr8   output_kwargsencodingr?   _text_encodingtext_encodingimage_tokensimage_token_encodingkimg_encodingtxt_encodingqformer_text_encodingimage_encodings                     r%   __call__zInstructBlipProcessor.__call__Y   s?   , >dlKLL***'
"&.."<"<
 
  >$$vd+JtAw4L !dee +=9==>NPTUN+T^^DfM-4PfaefN=KM-()9: $$0V5G "#//77$:O:OO'+~~!NSY.5Y] (6 ($ ( A ;>>RST>UWefgWh:i(6L, %|3(M!$ !/%''B *-^TMOOM*$:D$:$:4$`=Q^C_$`!,A,E,Ek,RH()1F1J1JK[1\H-.1T11&[M/<Z[NOON+3(s   1G*c                 :     | j                   j                  |i |S )z
        This method forwards all its arguments to PreTrainedTokenizer's [`~PreTrainedTokenizer.batch_decode`]. Please
        refer to the docstring of this method for more information.
        )r*   batch_decoder7   argsr8   s      r%   ra   z"InstructBlipProcessor.batch_decode   s     
 +t~~**D;F;;r$   c                 :     | j                   j                  |i |S )z
        This method forwards all its arguments to PreTrainedTokenizer's [`~PreTrainedTokenizer.decode`]. Please refer to
        the docstring of this method for more information.
        )r*   decoderb   s      r%   re   zInstructBlipProcessor.decode   s     
 %t~~$$d5f55r$   c                     | j                   j                  }| j                  j                  }t        t        j                  ||z               S N)r*   model_input_namesr)   rJ   dictfromkeys)r7   tokenizer_input_namesimage_processor_input_namess      r%   rh   z'InstructBlipProcessor.model_input_names   sA     !% @ @&*&:&:&L&L#DMM"7:U"UVWWr$   c                    t         j                  j                  |      rt        d| d      t        j                  |d       t         j                  j                  |d      }| j                  j                  |       d| j                  v }|r| j                  j                  d       t        |   |fi |}|r| xj                  dgz  c_        |S )NzProvided path (z#) should be a directory, not a fileT)exist_okr+   )ospathisfilerE   makedirsjoinr+   save_pretrained
attributesremover5   )r7   save_directoryr8   qformer_tokenizer_pathqformer_presentoutputsr9   s         r%   rt   z%InstructBlipProcessor.save_pretrained   s    77>>.)~.>>abcc
NT2!#n>Q!R../EF .@OO""#67').CFCOO 344Or$   c                     t        |   |fi |}t        |t              r|d   }t	        j                  |d      }||_        |S )Nr   r+   )	subfolder)r5   from_pretrainedrH   tupler   r+   )clspretrained_model_name_or_pathr8   	processorr+   r9   s        r%   r}   z%InstructBlipProcessor.from_pretrained   sP    G+,ITVT	 i'!!I)99:Wcvw&7	#r$   rg   )NNNN)r   r    r!   __doc__ru   valid_kwargsimage_processor_classtokenizer_classqformer_tokenizer_classr6   r   r   r   r   r   r
   r   r   r_   ra   re   propertyrh   rt   classmethodr}   __classcell__)r9   s   @r%   r(   r(   7   s    $ GJ&'L0%O-H "^bMM I0$y/4HYCZZ[M 45M 
M`<6 X X&  r$   r(   )r   ro   typingr   r   image_processing_utilsr   image_utilsr   processing_utilsr   r	   r
   tokenization_utils_baser   r   r   r   utilsr   autor   
get_loggerr   rO   r   r(   r#   r$   r%   <module>r      sa    
  2 % H H     
		H	%"2% "eN er$   