
    sg2                     |    d dl Z d dlZd dlZddlmZ ddlmZ ddlm	Z	 ddl
mZ  e	       rd dlmZ  G d	 d
e      Zy)    N   )AutoProcessor)VisionEncoderDecoderModel)is_vision_available   )PipelineTool)Imagec                   h     e Zd ZdZdZdZeZeZ	ddddddd	Z
dZ fd
ZdddefdZd Zd Z xZS )DocumentQuestionAnsweringToolz*naver-clova-ix/donut-base-finetuned-docvqaz}This is a tool that answers a question about an document (pdf). It returns a string that contains the answer to the question.document_qaimagezWThe image containing the information. Can be a PIL Image or a string path to the image.)typedescriptionstringzThe question in English)documentquestionc                 N    t               st        d      t        |   |i | y )NzBPillow must be installed to use the DocumentQuestionAnsweringTool.)r   
ValueErrorsuper__init__)selfargskwargs	__class__s      b/var/www/html/venv/lib/python3.12/site-packages/transformers/agents/document_question_answering.pyr   z&DocumentQuestionAnsweringTool.__init__0   s'    "$abb$)&)    r   r	   r   c                    d}|j                  d|      }| j                  j                  |dd      j                  }t	        |t
              r_t        j                  |      j                  d      }t        j                  |      j                  ddd	      }t        j                  |      }| j                  |d
      j                  }||dS )Nz9<s_docvqa><s_question>{user_input}</s_question><s_answer>z{user_input}Fpt)add_special_tokensreturn_tensorsRGBr   r   r   )r    )decoder_input_idspixel_values)replacepre_processor	tokenizer	input_ids
isinstancestrr	   openconvertnparray	transposetorch
from_numpyr#   )	r   r   r   task_promptpromptr"   img	img_arrayr#   s	            r   encodez$DocumentQuestionAnsweringTool.encode6   s    Q$$^X> ..88uT 9 

) 	 h$**X&..u5C//1a8I''	2H))(4)HUU%6UUr   c                    | j                   j                  |d   j                  | j                        |d   j                  | j                        | j                   j                  j
                  j                  d| j                  j                  j                  | j                  j                  j                  dd| j                  j                  j                  ggd
      j                  S )Nr#   r"   Tr   )	r"   
max_lengthearly_stoppingpad_token_ideos_token_id	use_cache	num_beamsbad_words_idsreturn_dict_in_generate)modelgeneratetodevicedecoderconfigmax_position_embeddingsr%   r&   r9   r:   unk_token_id	sequences)r   inputss     r   forwardz%DocumentQuestionAnsweringTool.forwardD   s    zz"">"%%dkk2$%89<<T[[Izz))00HH++55BB++55BB ..88EEFG$( # 
 )	r   c                    | j                   j                  |      d   }|j                  | j                   j                  j                  d      }|j                  | j                   j                  j
                  d      }t        j                  dd|d      j                         }| j                   j                  |      }|d   S )Nr    z<.*?>r   )countanswer)
r%   batch_decoder$   r&   	eos_token	pad_tokenresubstrip
token2json)r   outputssequences      r   decodez$DocumentQuestionAnsweringTool.decodeR   s    %%227;A>##D$6$6$@$@$J$JBO##D$6$6$@$@$J$JBO66(B:@@B%%00:!!r   )__name__
__module____qualname__default_checkpointr   namer   pre_processor_classr   model_classrH   output_typer   r)   r5   rI   rW   __classcell__)r   s   @r   r   r       sl    E RKD'+K t
 &6OPF K*Vw V# V"r   r   )rQ   numpyr,   r/   models.autor   models.vision_encoder_decoderr   utilsr   toolsr   PILr	   r    r   r   <module>rh      s3   " 
   ' E '  9"L 9"r   