
    sg[N                         d dl Z d dlZd dlZddlmZmZmZmZ ddl	m
Z
mZmZmZmZ  e       rd dlZddlmZmZ  e       rd dlZddlmZmZ  G d d	e
      Z e ed
             G d de             Zy)    N   )add_end_docstringsis_tf_availableis_torch_availablerequires_backends   )ArgumentHandlerDatasetPipelinePipelineExceptionbuild_pipeline_init_args),MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING_NAMES0MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING_NAMES)/TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING_NAMES3TF_MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING_NAMESc                       e Zd ZdZddZy)%TableQuestionAnsweringArgumentHandlerzB
    Handles arguments for the TableQuestionAnsweringPipeline
    Nc                    t        | d       dd l}|t        d      |t        |t              r&|j                  d      |j                  d      |g}nt        |t              rt        |      dkD  rrt        d |D              st        dd |D               |d   j                  d      |d   j                  d      |}npt        d	|d   j                          d
      t        t        |t              st        |t        j                        r|S t        dt        |       d      ||dg}|D ]C  }t        |d   |j                        r|d   t        d      |j                  |d         |d<   E |S )Npandasr   z(Keyword argument `table` cannot be None.querytablec              3   <   K   | ]  }t        |t                y wN)
isinstancedict.0ds     b/var/www/html/venv/lib/python3.12/site-packages/transformers/pipelines/table_question_answering.py	<genexpr>zATableQuestionAnsweringArgumentHandler.__call__.<locals>.<genexpr>5   s     >1:a.>s   z:Keyword argument `table` should be a list of dict, but is c              3   2   K   | ]  }t        |        y wr   )typer   s     r   r    zATableQuestionAnsweringArgumentHandler.__call__.<locals>.<genexpr>7   s     UmbcVZ[\V]Ums   zIf keyword argument `table` is a list of dictionaries, each dictionary should have a `table` and `query` key, but only dictionary has keys z `table` and `query` keys.zZInvalid input. Keyword argument `table` should be either of type `dict` or `list`, but is ))r   r   zTable cannot be None.)r   r   
ValueErrorr   r   getlistlenallkeysr
   typesGeneratorTyper"   	DataFrame)selfr   r   kwargspdtqa_pipeline_inputstqa_pipeline_inputs          r   __call__z.TableQuestionAnsweringArgumentHandler.__call__%   s    	$)=GHH]%&599W+=+IeiiX_N`Nl',g#E4(SZ!^>>>$TUmglUmTno  8<<(4qg9N9Z*/'$JJOPQ(--/IZZtv  $E7)CzRWY^YlYlGm u+a) 
 .3U#C"D"5 	X092<<H%g.6$%<==.0ll;Mg;V.W"7+	X #"    )NN)__name__
__module____qualname____doc__r2    r3   r   r   r       s    -#r3   r   T)has_tokenizerc                   h     e Zd ZdZdZ e       f fd	Zd Zd Z fdZ	ddZ
ddZdd	Zd
 Z xZS )TableQuestionAnsweringPipelinea  
    Table Question Answering pipeline using a `ModelForTableQuestionAnswering`. This pipeline is only available in
    PyTorch.

    Example:

    ```python
    >>> from transformers import pipeline

    >>> oracle = pipeline(model="google/tapas-base-finetuned-wtq")
    >>> table = {
    ...     "Repository": ["Transformers", "Datasets", "Tokenizers"],
    ...     "Stars": ["36542", "4512", "3934"],
    ...     "Contributors": ["651", "77", "34"],
    ...     "Programming language": ["Python", "Python", "Rust, Python and NodeJS"],
    ... }
    >>> oracle(query="How many stars does the transformers repository have?", table=table)
    {'answer': 'AVERAGE > 36542', 'coordinates': [(0, 1)], 'cells': ['36542'], 'aggregator': 'AVERAGE'}
    ```

    Learn more about the basics of using a pipeline in the [pipeline tutorial](../pipeline_tutorial)

    This tabular question answering pipeline can currently be loaded from [`pipeline`] using the following task
    identifier: `"table-question-answering"`.

    The models that this pipeline can use are models that have been fine-tuned on a tabular question answering task.
    See the up-to-date list of available models on
    [huggingface.co/models](https://huggingface.co/models?filter=table-question-answering).
    ztable,queryc                 ,   t        |   |i | || _        | j                  dk(  r*t	        j
                         }|j                  t               n)t        j
                         }|j                  t               | j                  |       t        t        | j                  j                  dd             xr* t        t        | j                  j                  dd             | _        t!        | j                  j                  d      rd| _        y d | _        y )Ntfaggregation_labelsnum_aggregation_labelstapas)super__init___args_parser	frameworkr   copyupdater   r   r   check_model_typeboolgetattrmodelconfig	aggregatehasattrr"   )r-   args_parserargsr.   mapping	__class__s        r   rB   z'TableQuestionAnsweringPipeline.__init__w   s    $)&)'>>T!INNPGNNJKFKKMGNNGHg&gdjj&7&79MtTU 
Z^DJJ%%'?F[
  'tzz'8'8:NOG	UY	r3   c                 &     | j                   di |S )Nr8   )rJ   )r-   inputss     r   batch_inferencez.TableQuestionAnsweringPipeline.batch_inference   s    tzz#F##r3   c           	      @   | j                   dk(  rg }g }d}|d   j                  d   }|d   j                  | j                        }|d   j                  | j                        }|d   j                  | j                        }d}	t	        |      D ]+  }
|'|	dddf   }t        j                  |j                         j                               }||
   }	t	        |j                  d         D ]{  }|	dddf   j                         |   }|	dddf   j                         |   dz
  }|	ddd	f   j                         |   dz
  }|dk\  s]|dk\  sc|dk(  sit        |||f         ||<   } t        j                  |      j                  t        j                        j                  | j                        |	dddf<   ||
   }||
   }||
   }	| j                  |j!                  d      |j!                  d      |	j!                  d      
      }|j"                  }| j$                  r|j'                  |j(                         |j'                  |       t        j*                  j-                  |      }|j.                  |j                  t        j0                        j                  |j.                  j                        z  }t3        j4                  t6              }t9        |j;                         j                               D ]  \  }}|	dddf   j                         |   }|	dddf   j                         |   dz
  }|	ddd	f   j                         |   dz
  }|dk\  s`|dk\  sf|dk(  sl|||f   j'                  |        |D ci c],  }|t        j<                  ||         j?                         dkD  . }}. t        j@                  tC        |      d      }| j$                  s|fS |t        j@                  tC        |      d      fS g }g }d}|d   j                  d   }|d   }|d   }|d   j                         }d}	t	        |      D ]  }
||	dddf   }t        j                  |t
        jD                        }||
   }	t	        |j                  d         D ]{  }|	dddf   j                         |   }|	dddf   j                         |   dz
  }|	ddd	f   j                         |   dz
  }|dk\  s]|dk\  sc|dk(  sit        |||f         ||<   } ||	dddf<   ||
   }||
   }||
   }	| j                  t        jF                  |d      t        jF                  |d      t        jF                  |	d      
      }|j"                  }| j$                  r|j'                  |j(                         |j'                  |       tH        jJ                  jM                  tI        jN                  |tH        j0                              tI        jN                  |tH        j0                        z  }t3        j4                  t6              }|	}	t9        tI        j:                  |      j                         j                               D ]  \  }}|	dddf   j                         |   }|	dddf   j                         |   dz
  }|	ddd	f   j                         |   dz
  }|dk\  s`|dk\  sf|dk(  sl|||f   j'                  |        |D ci c],  }|t        j<                  ||         j?                         dkD  . }} tI        jP                  tC        |      d      }| j$                  s|fS |tI        jP                  tC        |      d      fS c c}w c c}w )z
        Inference used for models that need to process sequences in a sequential fashion, like the SQA models which
        handle conversational query related to a table.
        ptN	input_idsr   attention_masktoken_type_ids   r   r   )rW   rX   rY   )logitsg      ?)dtype)axis))rD   shapetodevicerangenp
zeros_likecpunumpytolistinttorch
from_numpyr"   longrJ   	unsqueezer[   rL   appendlogits_aggregationdistributions	Bernoulliprobsfloat32collectionsdefaultdictr&   	enumeratesqueezearraymeancattupleint32expand_dimsr=   mathsigmoidcastconcat)r-   rS   
all_logitsall_aggregationsprev_answers
batch_sizerW   rX   rY   token_type_ids_exampleindexprev_labels_examplemodel_labelsi
segment_idcol_idrow_idinput_ids_exampleattention_mask_exampleoutputsr[   dist_per_tokenprobabilitiescoords_to_probspcolrowkeylogits_batchs                                r   sequential_inferencez3TableQuestionAnsweringPipeline.sequential_inference   s   
 >>T!J!L,2215J{+..t{{;I#$4588EN#$4588EN%)"z* .m  +*@A*F'#%==1D1H1H1J1P1P1R#SL-;E-B*"<#5#5a#89 R%;AqD%A%H%H%J1%M
!71!=!D!D!Fq!IA!M!71!=!D!D!Fq!IA!M!Q;6Q;:?.1,?O2P.QLOR 493C3CL3Q3V3VW\WaWa3b3e3efjfqfq3r*1a40$-e$4!)7)>&)7)>&**/99!<#9#C#CA#F#9#C#CA#F % 
 !>>$++G,F,FG!!&)!&!4!4!>!>f!>!M . 4 47M7R7RSXS`S`7a7d7d"((//8 ! #."9"9$"?%m&;&;&=&D&D&FG >DAq!71!=!D!D!Fq!IJ0A6==?BQFC0A6==?BQFCaxC1Hq'c
3::1=> ]llUXRXXoc.B%C%H%H%JS%P Pll].m` !99U:%6:L*...L?s|UYYW\]mWnpqMr>ssJ!L,2215J{+I#$45N#$45;;=N%)"z* .m  +*@A*F'#%==1DBHH#UL-;E-B*"<#5#5a#89 R%;AqD%A%H%H%J1%M
!71!=!D!D!Fq!IA!M!71!=!D!D!Fq!IA!M!Q;6Q;:?.1,?O2P.QLOR 4@*1a40$-e$4!)7)>&)7)>&** nn->QG#%>>2Hq#Q#%>>2Hq#Q % 
 !>>$++G,F,FG!!&) "

0K Lrww*BJJP ! #."9"9$"?)?&%bjj&?&E&E&G&N&N&PQ >DAq!71!=!D!D!Fq!IJ0A6==?BQFC0A6==?BQFCaxC1Hq'c
3::1=> ]llUXRXXoc.B%C%H%H%JS%P Pll].m` 99U:%6:L*...L?s|RYYW\]mWnpqMr>ssE  m|  ms   	1^1^c                 r     | j                   |i |}t        |   |fi |}t        |      dk(  r|d   S |S )a  
        Answers queries according to a table. The pipeline accepts several types of inputs which are detailed below:

        - `pipeline(table, query)`
        - `pipeline(table, [query])`
        - `pipeline(table=table, query=query)`
        - `pipeline(table=table, query=[query])`
        - `pipeline({"table": table, "query": query})`
        - `pipeline({"table": table, "query": [query]})`
        - `pipeline([{"table": table, "query": query}, {"table": table, "query": query}])`

        The `table` argument should be a dict or a DataFrame built from that dict, containing the whole table:

        Example:

        ```python
        data = {
            "actors": ["brad pitt", "leonardo di caprio", "george clooney"],
            "age": ["56", "45", "59"],
            "number of movies": ["87", "53", "69"],
            "date of birth": ["7 february 1967", "10 june 1996", "28 november 1967"],
        }
        ```

        This dictionary can be passed in as such, or can be converted to a pandas DataFrame:

        Example:

        ```python
        import pandas as pd

        table = pd.DataFrame.from_dict(data)
        ```

        Args:
            table (`pd.DataFrame` or `Dict`):
                Pandas DataFrame or dictionary that will be converted to a DataFrame containing all the table values.
                See above for an example of dictionary.
            query (`str` or `List[str]`):
                Query or list of queries that will be sent to the model alongside the table.
            sequential (`bool`, *optional*, defaults to `False`):
                Whether to do inference sequentially or as a batch. Batching is faster, but models like SQA require the
                inference to be done sequentially to extract relations within sequences, given their conversational
                nature.
            padding (`bool`, `str` or [`~utils.PaddingStrategy`], *optional*, defaults to `False`):
                Activates and controls padding. Accepts the following values:

                - `True` or `'longest'`: Pad to the longest sequence in the batch (or no padding if only a single
                  sequence if provided).
                - `'max_length'`: Pad to a maximum length specified with the argument `max_length` or to the maximum
                  acceptable input length for the model if that argument is not provided.
                - `False` or `'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of different
                  lengths).

            truncation (`bool`, `str` or [`TapasTruncationStrategy`], *optional*, defaults to `False`):
                Activates and controls truncation. Accepts the following values:

                - `True` or `'drop_rows_to_fit'`: Truncate to a maximum length specified with the argument `max_length`
                  or to the maximum acceptable input length for the model if that argument is not provided. This will
                  truncate row by row, removing rows from the table.
                - `False` or `'do_not_truncate'` (default): No truncation (i.e., can output batch with sequence lengths
                  greater than the model maximum admissible input size).


        Return:
            A dictionary or a list of dictionaries containing results: Each result is a dictionary with the following
            keys:

            - **answer** (`str`) -- The answer of the query given the table. If there is an aggregator, the answer will
              be preceded by `AGGREGATOR >`.
            - **coordinates** (`List[Tuple[int, int]]`) -- Coordinates of the cells of the answers.
            - **cells** (`List[str]`) -- List of strings made up of the answer cell values.
            - **aggregator** (`str`) -- If the model has an aggregator, this returns the aggregator.
        r   r   )rC   rA   r2   r'   )r-   rO   r.   pipeline_inputsresultsrQ   s        r   r2   z'TableQuestionAnsweringPipeline.__call__  sL    V ,$++T<V<'"?=f=w<11:r3   c                 >    i }|||d<   |||d<   i }|||d<   ||i fS )Npadding
truncation
sequentialr8   )r-   r   r   r   r.   preprocess_paramsforward_paramss          r   _sanitize_parametersz3TableQuestionAnsweringPipeline._sanitize_parameters_  sL    +2i(!.8l+!+5N<( ."44r3   c                     || j                   dk(  rd}nd}|d   |d   }}|j                  rt        d      ||dk(  rt        d      | j                  ||| j                  ||	      }||d<   |S )
Nr@   drop_rows_to_fitdo_not_truncater   r   ztable is empty zquery is empty)return_tensorsr   r   )r"   emptyr$   	tokenizerrD   )r-   pipeline_inputr   r   r   r   r   rS   s           r   
preprocessz)TableQuestionAnsweringPipeline.preprocessk  s    yyG#/
.
%g.w0Gu;;-..=ERK-..uT^^Xblstwr3   c                    |j                  d      }| j                  dk(  r(|r | j                  di |}nD | j                  di |}n1d|vr| j                  |d<    | j
                  j                  di ||}|||d}|S )Nr   r@   generation_config)model_inputsr   r   r8   )popr"   r   rT   r   rJ   generate)r-   r   r   generate_kwargsr   r   model_outputss          r   _forwardz'TableQuestionAnsweringPipeline._forward{  s      )993$33ClC.$..>> #/97;7M7M 34)djj))LLLOLG)5RYZr3   c                 >   |d   }|d   }|d   }| j                   dk(  r| j                  r|d d \  }}| j                  j                  |||      }|\  }}	t	        |	      D 
ci c])  \  }
}|
| j
                  j                  j                  |   + }}
}| j
                  j                  j                  }t	        |	      D 
ci c]  \  }
}||k7  s|
||
   dz    }}
}n*|d   }| j                  j                  ||      }|d   }i }i }g }t	        |      D ]  \  }}|D cg c]  }|j                  |    }}|j                  |d      }|j                  |d      }|d	j                  |      z   ||D cg c]  }|j                  |    c}d
}|r||d<   |j                  |        t              dk(  r7t        d      | j                  j                  |d      D cg c]  }d|i }}t        |      dkD  r|S |d   S c c}}
w c c}}
w c c}w c c}w c c}w )Nr   r   r   r@   r   z > r   r   z, )answercoordinatescells
aggregatorzEmpty answerT)skip_special_tokensr   r   )r"   rL   r   convert_logits_to_predictionsrt   rJ   rK   r>   no_aggregation_label_indexiatr%   joinrl   r'   r   batch_decode)r-   r   rS   r   r   r[   
logits_aggpredictionsanswer_coordinates_batchagg_predictionsr   predaggregatorsno_agg_label_indexaggregators_prefixanswersr   r   
coordinater   r   aggregator_prefixr   s                          r   postprocessz*TableQuestionAnsweringPipeline.postprocess  sS   ~.g&	*99~~%,Ra["
"nnJJ6SY[ef<G9(/\efu\vwQXQRTXq$**"3"3"F"Ft"LLww%)ZZ%6%6%Q%Q"=F=W&29!T[_cu[uA{1~--&" & !"nnJJ6SYZ+6q>( %'"G&/0H&I '"{ALM::.MM(__UB7
$6$:$:5"$E!/$))E2BB#.FQR
eii
3R
 +5F<(v&' 6{a'778<8S8ST[qu8S8vwf&)wGwg,*w:
:? x& N S xs$   $.HH
H
!H5H
"H)NNN)NTN)F)r4   r5   r6   r7   default_input_namesr   rB   rT   r   r2   r   r   r   r   __classcell__)rQ   s   @r   r;   r;   U   sD    < (#H#J Z"$@tDPd
5 "(;r3   r;   )rr   r*   re   rb   utilsr   r   r   r   baser	   r
   r   r   r   rh   models.auto.modeling_autor   r   
tensorflowr=   models.auto.modeling_tf_autor   r   r   r;   r8   r3   r   <module>r      sx        b a 
 2#O 2#j ,4@A^;X ^; B^;r3   