
    9i                     `    S SK JrJrJr  S SKJr  S SKJr  S SKJ	r	  \" 5       r
 " S S\5      rg)    )ListOptionalUnion)
get_logger)InferFramework)is_vllm_availablec                      ^  \ rS rSr    SS\S\S\S\S\\   4
U 4S jjjrS\	\
\   \
\
\      4   S	\
\   4S
 jrS\4S jrSrU =r$ )Vllm
   model_id_or_dirdtypequantizationtensor_parallel_sizetrust_remote_codec                    > [         TU ]  U5        [        5       (       d  [        S5      eSSKJn  [        R                  S5      (       d  US;   a  SnU" U R                  UUUUS9U l	        g)	z
Args:
    dtype: The dtype to use, support `auto`, `float16`, `bfloat16`, `float32`
    quantization: The quantization bit, default None means do not do any quantization.
    tensor_parallel_size: The tensor parallel size.
zLInstall vllm by `pip install vllm` before using vllm to accelerate inferencer   )LLM   )bfloat16autofloat16)r   r   r   r   N)
super__init__r   ImportErrorvllmr   r
   check_gpu_compatibility	model_dirmodel)selfr   r   r   r   r   r   	__class__s          d/var/www/html/land-doc-ocr/venv/lib/python3.13/site-packages/modelscope/pipelines/accelerate/vllm.pyr   Vllm.__init__   sp     	) ""^  	++A..E7K5LENN%/!57
    promptsreturnc                 b   UR                  SS5      nUR                  SS5      nUR                  SS5      nUR                  SS5      nU(       d  US:  a  SUS'   U(       a  U[        US	   5      -
  US
'   U(       a  XbS
'   S	SKJn  U" S0 UD6n[	        US	   [
        5      (       a@  U R                  R                  XS9 V	s/ s H  oR                  S	   R                  PM     sn	$ U R                  R                  XS9 V	s/ s H  oR                  S	   R                  PM     sn	$ s  sn	f s  sn	f )zGenerate tokens.
Args:
    prompts(`Union[List[str], List[List[int]]]`):
        The string batch or the token list batch to input to the model.
    kwargs: Sampling parameters.
	do_sampleNnum_beam   
max_lengthmax_new_tokensTuse_beam_searchr   
max_tokens)SamplingParams)sampling_params)prompt_token_idsr.    )
poplenr   r-   
isinstancestrr   generateoutputstext)
r   r#   kwargsr&   r'   r)   r*   r-   r.   outputs
             r    __call__Vllm.__call__)   s<    JJ{D1	::j!,ZZd3
$4d; X\(,F$%#-GAJ#?F< #1< '(262gaj#&&59ZZ5H5H 6I 6>6>6q!&& 6>  6:ZZ5H5H%, 6I 6O6O6q!&& 6O 
s   "D'"D,
model_typec                 d    [        S Vs/ s H  o"UR                  5       ;   PM     sn5      $ s  snf )N)llamabaichuaninternlmmistralaquilabloomfalcongptmptoptqwenrB   )anylower)r   r<   r   s      r    model_type_supportedVllm.model_type_supportedP   s>    6
6EZ%%'' 6
  	 
s   -)r   )r   Nr(   N)__name__
__module____qualname____firstlineno__r4   intr   boolr   r   r   r:   rK   __static_attributes____classcell__)r   s   @r    r
   r
   
   s     %%)-.597"%77  #7 (+	7
 %-TN7 7:%d3id3i&@ A %"3i%Ns  r"   r
   N)typingr   r   r   
modelscoper   $modelscope.pipelines.accelerate.baser   modelscope.utils.import_utilsr   loggerr
   r0   r"   r    <module>rZ      s)    ( ( ! ? ;	V> Vr"   