
    =Ki                       S r SSKJr  SSKrSSKrSSKrSSKrSSKJr  SSK	J
r
Jr  SSKJr  SSKJrJrJrJrJrJr  SSKJrJrJrJrJr  SS	KJr  SS
KJr  SSKJ r   SSK!J"r"J#r#J$r$J%r%  \RL                  " \'5      r( " S S\5      r) " S S\SS9r* " S S\5      r+ " S S\SS9r, " S S5      r-\\+\,\.4   r/ " S S\5      r0\\0\.4   r1 " S S\-5      r2  S'S jr3Sr4S(S jr5 " S  S!5      r6    S)S" jr7    S*S# jr8    S+S$ jr9    S,S% jr:\\\\ RJ                     \\ RH                     /\\+\,4   4   \\;\ RJ                     \;\ RH                     /\\+\,4   4   4   r<S-S& jr=g).z?This module contains the evaluator classes for evaluating runs.    )annotationsN)abstractmethod)	AwaitableSequence)wraps)AnyCallableLiteralOptionalUnioncast)	BaseModel
ConfigDictFieldValidationErrormodel_validator)	TypedDictrun_helpers)schemas)
SCORE_TYPE
VALUE_TYPEExampleRunc                  0    \ rS rSr% SrS\S'    S\S'   Srg)	Category   z$A category for categorical feedback.Optional[Union[float, int]]valuestrlabel N__name__
__module____qualname____firstlineno____doc____annotations____static_attributes__r"       `/var/www/html/dynamic-report/venv/lib/python3.13/site-packages/langsmith/evaluation/evaluator.pyr   r      s    .&&CJ&r+   r   c                  H    \ rS rSr% SrS\S'    S\S'    S\S'    S\S	'   S
rg)FeedbackConfig(   zcConfiguration to define a type of feedback.

Applied on on the first creation of a `feedback_key`.
z0Literal['continuous', 'categorical', 'freeform']typer   minmaxz%Optional[list[Union[Category, dict]]]
categoriesr"   Nr#   r"   r+   r,   r.   r.   (   s*    
 ;:	$$;	$$A55r+   r.   F)totalc                      \ rS rSr% SrS\S'    SrS\S'    SrS\S	'    SrS
\S'    Sr	S\S'    Sr
S
\S'    \" \S9rS\S'    SrS\S'    SrS\S'    SrS\S'    SrS
\S'    \" SS9r\" SS9SS j5       rSrg)EvaluationResult7   zEvaluation result.r    keyNr   scorer   r   zOptional[dict]metadatazOptional[str]comment
correction)default_factorydictevaluator_infoz%Optional[Union[FeedbackConfig, dict]]feedback_configOptional[Union[uuid.UUID, str]]source_run_idtarget_run_idextraforbid)rD   after)modec                    U R                   cG  [        U R                  [        [        45      (       a"  [
        R                  SU R                   35        U $ )z:Warn when numeric values are passed via the `value` field.zJNumeric values should be provided in the 'score' field, not 'value'. Got: )r9   
isinstancer   intfloatloggerwarningselfs    r,   check_value_non_numeric(EvaluationResult.check_value_non_numericV   sH     ::*TZZ#u"F"FNN& r+   r"   )returnr6   )r$   r%   r&   r'   r(   r)   r9   r   r:   r;   r<   r   r>   r?   r@   rB   rC   rD   r   model_configr   rP   r*   r"   r+   r,   r6   r6   7   s    	H@E:0E:8#Hn#8!G]!2!%J%: 6ND65=AO:A;59M29659M29 !E> )H-L'" #r+   r6   c                  $    \ rS rSr% SrS\S'   Srg)EvaluationResultsa   zeBatch evaluation results.

This makes it easy for your evaluator to return multiple
metrics at once.
zlist[EvaluationResult]resultsr"   Nr#   r"   r+   r,   rU   rU   a   s     $#!r+   rU   c                  b    \ rS rSrSr\  S       SS jj5       r  S       SS jjrSrg)	RunEvaluatorl   zEvaluator interface class.Nc                    g)zEvaluate an example.Nr"   )rO   runexampleevaluator_run_ids       r,   evaluate_runRunEvaluator.evaluate_runo   s    r+   c                   ^ ^^^^#    [         R                  " 5       mUUUUU 4S jn[        R                  " 5       R	                  SU5      I Sh  vN $  N7f)z#Evaluate an example asynchronously.c                    > [         R                  " S0 T D6   TR                  TTT5      sS S S 5        $ ! , (       d  f       g = f)Nr"   )rhtracing_contextr_   )current_contextr^   r]   r\   rO   s   r,   _run_with_context5RunEvaluator.aevaluate_run.<locals>._run_with_context   s3    ##6o6((g7GH 766s	   5
AN)rc   get_tracing_contextasyncioget_running_looprun_in_executor)rO   r\   r]   r^   rf   re   s   ```` @r,   aevaluate_runRunEvaluator.aevaluate_runx   sG      002	I 	I --/??FWXXXXs   AAAAr"   NNr\   r   r]   Optional[Example]r^   Optional[uuid.UUID]rR   *Union[EvaluationResult, EvaluationResults])	r$   r%   r&   r'   r(   r   r_   rl   r*   r"   r+   r,   rY   rY   l   s    $ &*04	## ## .	#
 
4# # &*04	YY #Y .	Y
 
4Y Yr+   rY   c                  P    \ rS rSr% SrS\S'    S\S'    SrS\S	'    SrS
\S'   Srg)ComparisonEvaluationResult   zFeedback scores for the results of comparative evaluations.

These are generated by functions that compare two or more runs,
returning a ranking or other feedback.
r    r8   z'dict[Union[uuid.UUID, str], SCORE_TYPE]scoresNrA   rB   z6Optional[Union[str, dict[Union[uuid.UUID, str], str]]]r;   r"   )	r$   r%   r&   r'   r(   r)   rB   r;   r*   r"   r+   r,   rt   rt      s8     
H@33459M296FJGCJr+   rt   c                     ^  \ rS rSrSr S   SS jjr S       SS jjr      SS jr      SS jr\	SS j5       r
  S       SS jjr  S     SU 4S	 jjjr S     SS
 jjrSS jrSrU =r$ )DynamicRunEvaluator   a  A dynamic evaluator that wraps a function and transforms it into a `RunEvaluator`.

This class is designed to be used with the `@run_evaluator` decorator, allowing
functions that take a `Run` and an optional `Example` as arguments, and return
an `EvaluationResult` or `EvaluationResults`, to be used as instances of `RunEvaluator`.

Attributes:
    func (Callable): The function that is wrapped by this evaluator.
c                  ^ [        U5      u  nmU(       a  [        U5      u  nmS	U4S jjn[        U5      " U 5        SSKJn  Ub&  UR	                  X#S9U l        [        USS5      U l        [        R                  " U5      (       a5  Ub  [        S5      eUR	                  XS9U l        [        USS5      U l        gUR	                  [        [        [        [        [           /[         4   U5      US9U l        [        USS5      U l        g)
zInitialize the `DynamicRunEvaluator` with a given function.

Args:
    func (Callable): A function that takes a `Run` and an optional `Example` as
    arguments, and returns a dict or `ComparisonEvaluationResult`.
c                f   > Tc  U $ T" U R                  S5      U R                  S5      5      u    pU$ )Nr\   r]   getinputs_traced_inputsprepare_inputss      r,   process_inputs4DynamicRunEvaluator.__init__.<locals>.process_inputs   s<    %$2

5!6::i#8%!Q ! r+   r   r   Nr   r$   rx   Func was provided as a coroutine function, but afunc was also provided. If providing both, func should be a regular function to avoid ambiguity.r   r>   rR   r>   )_normalize_evaluator_funcr   	langsmithr   ensure_traceableafuncgetattr_nameinspectiscoroutinefunction	TypeErrorr   r	   r   r   r   _RUNNABLE_OUTPUTfuncrO   r   r   r   r   r   s        @r,   __init__DynamicRunEvaluator.__init__   s   ( ";4!@~&?&F#UN	! 	dD)$55 6 DJ !
4IJDJ&&t,, 3 
 %55 6 DJ !z3HIDJ#44XsHW$568HHI4P- 5 DI !z3HIDJr+   c                v  ^ [        T[        5      (       a  TR                  (       d  UTl        T$  T(       d  [        ST 35      eST;  a  U(       a  U R                  TS'   [        U4S jS 5       5      (       a  [        ST 35      e[        S0 SU0TED6$ ! [         a  n[        ST 35      UeS nAff = f)	NziExpected an EvaluationResult object, or dict with a metric 'key' and optional 'score'; got empty result: r8   c              3  ,   >#    U  H	  oT;  v   M     g 7fNr"   ).0kresults     r,   	<genexpr>@DynamicRunEvaluator._coerce_evaluation_result.<locals>.<genexpr>   s     J,IqF?,I   )r9   r   r;   zrExpected an EvaluationResult object, or dict with a metric 'key' and optional 'score' or categorical 'value'; got rB   z[Expected an EvaluationResult object, or dict with a metric 'key' and optional 'score'; got r"   )rI   r6   rB   
ValueErrorr   allr   )rO   r   rB   allow_no_keyes    `   r,   _coerce_evaluation_result-DynamicRunEvaluator._coerce_evaluation_result   s     f.//'''4$M	 FFLXO  F"| $

uJ,IJJJ OOUhX  $Q&P&PQQ 	44:8= 	s   A'B 
B8$B33B8c                    SU;   aA  UR                  5       nUS    Vs/ s H  nU R                  XBS9PM     snUS'   [        S0 UD6$ U R                  [        [        U5      USS9$ s  snf )NrW   )rB   T)rB   r   r"   )copyr   rU   r   r>   )rO   rW   rB   cprs        r,   _coerce_evaluation_results.DynamicRunEvaluator._coerce_evaluation_results  s    
 B !++A ..q.N+ByM %*r**--w}4 . 
 	
s   A'c                    [        U[        5      (       a  UR                  (       d  X!l        U$ [        U5      nU R	                  X5      $ r   )rI   r6   rB   _format_evaluator_resultr   )rO   r   rB   s      r,   _format_result"DynamicRunEvaluator._format_result  sA     f.//'''4$M)&1..vEEr+   c                    [        U S5      $ zCheck if the evaluator function is asynchronous.

Returns:
    bool: `True` if the evaluator function is asynchronous, `False` otherwise.
r   hasattrrN   s    r,   is_asyncDynamicRunEvaluator.is_async$       tW%%r+   c                   [        U S5      (       dU  [        R                  " 5       nUR                  5       (       a  [	        S5      eUR                  U R                  X5      5      $ Uc  [        R                  " 5       nSUR                  0n[        USS5      (       a  [        UR                  5      US'   U R                  UUX5S.S9nU R                  Xc5      $ )	aU  Evaluate a run using the wrapped function.

This method directly invokes the wrapped function with the provided arguments.

Args:
    run (Run): The run to be evaluated.
    example (Optional[Example]): An optional example to be used in the evaluation.

Returns:
    Union[EvaluationResult, EvaluationResults]: The result of the evaluation.
r   tCannot call `evaluate_run` on an async run evaluator from within an running event loop. Use `aevaluate_run` instead.NrC   
session_id
experimentrun_idr:   langsmith_extra)r   ri   get_event_loop
is_runningRuntimeErrorrun_until_completerl   uuiduuid4idr   r    r   r   r   )rO   r\   r]   r^   running_loopr:   r   s          r,   r_    DynamicRunEvaluator.evaluate_run-  s    " tV$$"113L&&(("R 
 $66t7I7I#7WXX##zz|$3SVV#<3d++%(%8H\"'7N  

 ""6<<r+   c                \  >#    [        U S5      (       d  [        TU ]	  X5      I Sh  vN $ Uc  [        R                  " 5       nSUR
                  0n[        USS5      (       a  [        UR                  5      US'   U R                  UUX4S.S9I Sh  vN nU R                  XS5      $  N N7f)a|  Evaluate a run asynchronously using the wrapped async function.

This method directly invokes the wrapped async function with the
    provided arguments.

Args:
    run (Run): The run to be evaluated.
    example (Optional[Example]): An optional example to be used
        in the evaluation.

Returns:
    Union[EvaluationResult, EvaluationResults]: The result of the evaluation.
r   NrC   r   r   r   r   )r   superrl   r   r   r   r   r    r   r   r   )rO   r\   r]   r^   r:   r   	__class__s         r,   rl   !DynamicRunEvaluator.aevaluate_runS  s     & tW%%.s<<<##zz|$3SVV#<3d++%(%8H\"zz'7N " 
 

 ""6<< =
s"   $B,B(A*B,B*B,*B,c                $    U R                  X5      $ )a  Make the evaluator callable, allowing it to be used like a function.

This method enables the evaluator instance to be called directly, forwarding the
call to `evaluate_run`.

Args:
    run (Run): The run to be evaluated.
    example (Optional[Example]): An optional example to be used in the evaluation.

Returns:
    Union[EvaluationResult, EvaluationResults]: The result of the evaluation.
)r_   )rO   r\   r]   s      r,   __call__DynamicRunEvaluator.__call__t  s       ..r+   c                "    SU R                    S3$ ))Represent the DynamicRunEvaluator object.z<DynamicRunEvaluator >r   rN   s    r,   __repr__DynamicRunEvaluator.__repr__  s    &tzzl!44r+   r   r   r   r   )r   XCallable[[Run, Optional[Example]], Union[_RUNNABLE_OUTPUT, Awaitable[_RUNNABLE_OUTPUT]]]r   zIOptional[Callable[[Run, Optional[Example]], Awaitable[_RUNNABLE_OUTPUT]]])F)r   zUnion[EvaluationResult, dict]rB   	uuid.UUIDr   boolrR   r6   )rW   zUnion[dict, EvaluationResults]rB   r   rR   rr   )r   zMUnion[EvaluationResult, EvaluationResults, dict, str, int, bool, float, list]rB   r   rR   rr   rR   r   rn   ro   )r\   r   r]   rp   r^   rq   )r\   r   r]   rp   rR   rr   rR   r    )r$   r%   r&   r'   r(   r   r   r   r   propertyr   r_   rl   r   r   r*   __classcell__)r   s   @r,   rx   rx      sE   , 8J
8J
8J| #	- ! 	
 
<
/
 !
 
4	
"F
F
 !F 
4F & & &*04	$=$= #$= .	$=
 
4$=R &*04	== #= .	= =D 6://!2/	3/"5 5r+   rx   c                    [        U 5      $ )zeCreate a run evaluator from a function.

Decorator that transforms a function into a `RunEvaluator`.
)rx   r   s    r,   run_evaluatorr     s     t$$r+   i'  c                `    [        U 5      n[        U5      [        :  a  US [        S-
   S-   nU$ )N   z...))reprlen_MAXSIZE)objss     r,   _maxsize_reprr     s1    S	A
1vn1&Hr+   c                      \ rS rSrSr S   SS jjr\SS j5       r S     SS jjr S     SS jjr	 S     SS jjr
SS	 jr\SS
 j5       r        SS jrSrg)DynamicComparisonRunEvaluatori  z4Compare predictions (as traces) from 2 or more runs.Nc                (  ^ [        U5      u  nmU(       a  [        U5      u  nmS	U4S jjn[        U5      " U 5        SSKJn  Ub&  UR	                  X#S9U l        [        USS5      U l        [        R                  " U5      (       a5  Ub  [        S5      eUR	                  XS9U l        [        USS5      U l        gUR	                  [        [        [        [           [        [            /["        4   U5      US9U l        [        USS5      U l        g)
zInitialize the `DynamicRunEvaluator` with a given function.

Args:
    func (Callable): A function that takes a `Run` and an optional `Example` as
    arguments, and returns an `EvaluationResult` or `EvaluationResults`.
c                f   > Tc  U $ T" U R                  S5      U R                  S5      5      u    pU$ )Nrunsr]   r|   r~   s      r,   r   >DynamicComparisonRunEvaluator.__init__.<locals>.process_inputs  s<    %$2

6"FJJy$9%!Q ! r+   r   r   Nr   r$   rx   r   r   )$_normalize_comparison_evaluator_funcr   r   r   r   r   r   r   r   r   r   r   r	   r   r   r   r   _COMPARISON_OUTPUTr   r   s        @r,   r   &DynamicComparisonRunEvaluator.__init__  s%   ( "Fd!K~&J5&Q#UN	! 	dD)$55 6 DJ !
4IJDJ&&t,, 3 
 %55 6 DJ !z3HIDJ#44!#(9:*,   . 5 	DI !z3HIDJr+   c                    [        U S5      $ r   r   rN   s    r,   r   &DynamicComparisonRunEvaluator.is_async  r   r+   c                d   [        U S5      (       dU  [        R                  " 5       nUR                  5       (       a  [	        S5      eUR                  U R                  X5      5      $ [        R                  " 5       nU R                  U5      nU R                  UUXES.S9nU R                  XdU5      $ )zCompare runs to score preferences.

Args:
    runs: A list of runs to compare.
    example: An optional example to be used in the evaluation.

r   r   r   tagsr   )r   ri   r   r   r   r   acompare_runsr   r   	_get_tagsr   _format_results)rO   r   r]   r   rB   r   r   s          r,   compare_runs*DynamicComparisonRunEvaluator.compare_runs  s     tV$$"113L&&(("R 
 $66&&t5  

~~d#'4C  

 ##F4@@r+   c                   #    [        U S5      (       d  U R                  X5      $ [        R                  " 5       nU R	                  U5      nU R                  UUX4S.S9I Sh  vN nU R                  XSU5      $  N7f)an  Evaluate a run asynchronously using the wrapped async function.

This method directly invokes the wrapped async function with the
    provided arguments.

Args:
    runs (Run): The runs to be evaluated.
    example (Optional[Example]): An optional example to be used
        in the evaluation.

Returns:
    ComparisonEvaluationResult: The result of the evaluation.
r   r   r   N)r   r   r   r   r   r   r   )rO   r   r]   rB   r   r   s         r,   r   +DynamicComparisonRunEvaluator.acompare_runs  s|       tW%%$$T33

~~d#zz'4C " 
 

 ##F4@@
s   AA:!A8"A:c                $    U R                  X5      $ )ay  Make the evaluator callable, allowing it to be used like a function.

This method enables the evaluator instance to be called directly, forwarding the
call to `evaluate_run`.

Args:
    run (Run): The run to be evaluated.
    example (Optional[Example]): An optional example to be used in the evaluation.

Returns:
    ComparisonEvaluationResult: The result of the evaluation.
)r   )rO   r   r]   s      r,   r   &DynamicComparisonRunEvaluator.__call__'  s       //r+   c                "    SU R                    S3$ )r   z<DynamicComparisonRunEvaluator r   r   rN   s    r,   r   &DynamicComparisonRunEvaluator.__repr__8  s    0A>>r+   c                    / nU  He  nUR                  S[        UR                  5      -   5        [        USS5      (       d  M>  UR                  S[        UR                  5      -   5        Mg     U$ )zExtract tags from runs.zrun:r   Nzexperiment:)appendr    r   r   r   )r   r   r\   s      r,   r   'DynamicComparisonRunEvaluator._get_tags<  s\     CKKSVV,-sL$//MC,??@  r+   c                   [        U[        5      (       a  UR                  (       d  X!l        U$ [        U[        5      (       a:  [	        X15       VVs0 s H  u  pEUR
                  U_M     snnU R                  US.nO<[        U[        5      (       a  SU;  a  U R                  US'   OSU< 3n[        U5      e [        S0 SU0UED6$ s  snnf ! [         a  n[        SU 35      UeS nAff = f)N)rv   r8   rB   r8   zXExpected 'dict', 'list' or 'ComparisonEvaluationResult' result object. Received: result=rB   zExpected a dictionary with a 'key' and dictionary of scores mappingrun IDs to numeric scores, or ComparisonEvaluationResult object, got r"   )
rI   rt   rB   listzipr   r   r>   r   r   )rO   r   rB   r   r\   r9   msgr   s           r,   r   -DynamicComparisonRunEvaluator._format_resultsG  s	    f899'''4$M%%;>t;LM;LZS3665=;LMzz!.F
 %%F" $

u-%+I/  S/!		- "M<V<  N"  	x! 		s   C
;C 
C.C))C.r   r   )r   fCallable[[Sequence[Run], Optional[Example]], Union[_COMPARISON_OUTPUT, Awaitable[_COMPARISON_OUTPUT]]]r   zUOptional[Callable[[Sequence[Run], Optional[Example]], Awaitable[_COMPARISON_OUTPUT]]]r   )r   Sequence[Run]r]   rp   rR   rt   r   )r   r	  rR   z	list[str])r   z-Union[dict, list, ComparisonEvaluationResult]rB   r   r   r	  rR   rt   )r$   r%   r&   r'   r(   r   r   r   r   r   r   r   staticmethodr   r   r*   r"   r+   r,   r   r     s    > >J
>J
>J@ & & AEA!A,=A	#A@ AEA!A,=A	#A: AE0!0,=0	#0"?  "=" !" 	"
 
$"r+   r   c                    [        U 5      $ )z.Create a comaprison evaluator from a function.)r   r   s    r,   comparison_evaluatorr  l  s     )..r+   c                
  ^ ^^	^
^ Sm[         R                  " T 5      m
T
R                  R                  5        VVs/ s H#  u  pUR                  UR
                  :w  d  M!  UPM%     nnnT
R                  R                  5        VVs/ s H0  u  pUR                  [         R                  R                  Ld  M.  UPM2     snnm	U(       aB  [        U	U4S jU 5       5      (       d8  [        U Vs/ s H  oDT	;  d  M
  UPM     sn5      S:w  a  ST S3n[        U5      e[        U	U4S jU 5       5      (       a  USS/:X  a  T S 4$ [         R                  " T 5      (       aO        SU
4S	 jjm      SUU 4S
 jjn[        T S5      (       a  [        T S5      OUR                  Ul        UT4$       SU
4S jjmSUU 4S jjn[        T S5      (       a  [        T S5      OUR                  Ul        UT4$ s  snnf s  snnf s  snf )N)r\   r]   r   outputsreference_outputsattachmentsc              3  D   >#    U  H  oT;   =(       d    UT;   v   M     g 7fr   r"   r   pnameargs_with_defaultssupported_argss     r,   r   ,_normalize_evaluator_func.<locals>.<genexpr>  &      
PXu^#Bu0B'BBPX       UInvalid evaluator function. Must have at least one argument. Supported arguments are . Please see https://docs.smith.langchain.com/evaluation/how_to_guides/evaluation/evaluate_llm_application#use-custom-evaluatorsc              3  D   >#    U  H  oT;   =(       d    UT;   v   M     g 7fr   r"   r  s     r,   r   r    %      LT5>5,>#>>Hr  r\   r]   c                  > U UU(       a  UR                   O0 U R                  =(       d    0 U(       a  UR                  =(       d    0 O0 U(       a  UR                  =(       d    0 O0 S.n0 n/ n0 nTR                  R	                  5        He  u  pgXb;   d  M  UR
                  UR                  UR                  4;   a  UR                  X&   5        OX&   X6'   US;   a  [        X&   5      OX&   XV'   Mg     XCU4$ N)r\   r]   r   r  r  r  )r\   r]   
r   r  r  
parametersitemskindPOSITIONAL_OR_KEYWORDPOSITIONAL_ONLYr  r   	r\   r]   arg_mapkwargsargsr   
param_nameparamsigs	           r,   _prepare_inputs2_normalize_evaluator_func.<locals>._prepare_inputs       &07gnnR"{{0b@G7#6#6#<"RBI)>Br  "),)=)=)?%J!, ::!77!11*  !KK(;<181DF.  *-?? *'*=>!(!4 &1 *@ ]22r+   c                F   >#    T" X5      u  p#nT" U0 UD6I S h  vN $  N7fr   r"   r\   r]   r)  r(  r   r-  r   s        r,   awrapper+_normalize_evaluator_func.<locals>.awrapper  s-      %4C$A!q!4262222   !!r$   c                  > U UU(       a  UR                   O0 U R                  =(       d    0 U(       a  UR                  =(       d    0 O0 U(       a  UR                  =(       d    0 O0 S.n0 n/ n0 nTR                  R	                  5        He  u  pgXb;   d  M  UR
                  UR                  UR                  4;   a  UR                  X&   5        OX&   X6'   US;   a  [        X&   5      OX&   XV'   Mg     XCU4$ r  r   r&  s	           r,   r-  r.    r/  r+   c                *   > T" X5      u  p#nT" U0 UD6$ r   r"   r1  s        r,   wrapper*_normalize_evaluator_func.<locals>.wrapper  s"    $3C$A!qT,V,,r+   )r\   r   r]   rp   rR   tuple[list, dict, dict])r\   r   r]   rp   rR   r   r   	signaturer!  r"  r#  VAR_KEYWORDdefault	Parameteremptyr   r   r   r   r   r   r$   r   r  pall_argsar  r2  r7  r-  r  r,  r  s   `       @@@@r,   r   r   v  s9   N 

D
!C&)nn&:&:&<X&<(%!--@W&<HX ,,..HE99G--333 	.
  
PX
 
 
 HDHq1C(CHDEJ11?0@ AFG 	 o  LT  	 
 Tz&&t,,33#43(3>33#43!3 3 4,, j)&& 
 o..33#43(3>- - 4,, j)%% 
 _--k Y Es#    G4G4-G:6G:+	H 8H c                  ^ ^^	^
^ Sm[         R                  " T 5      m
T
R                  R                  5        VVs/ s H#  u  pUR                  UR
                  :w  d  M!  UPM%     nnnT
R                  R                  5        VVs/ s H0  u  pUR                  [         R                  R                  Ld  M.  UPM2     snnm	U(       aB  [        U	U4S jU 5       5      (       d8  [        U Vs/ s H  oDT	;  d  M
  UPM     sn5      S:w  a  ST S3n[        U5      e[        U	U4S jU 5       5      (       a  USS/:X  a  T S 4$ [         R                  " T 5      (       aO        SU
4S	 jjm      SUU 4S
 jjn[        T S5      (       a  [        T S5      OUR                  Ul        UT4$       SU
4S jjm      SUU 4S jjn[        T S5      (       a  [        T S5      OUR                  Ul        UT4$ s  snnf s  snnf s  snf )Nr   r]   r   r  r  c              3  D   >#    U  H  oT;   =(       d    UT;   v   M     g 7fr   r"   r  s     r,   r   7_normalize_comparison_evaluator_func.<locals>.<genexpr>  r  r  r  r  r  c              3  D   >#    U  H  oT;   =(       d    UT;   v   M     g 7fr   r"   r  s     r,   r   rG     r  r  r   r]   c                  > U UU(       a  UR                   O0 U  Vs/ s H  o"R                  =(       d    0 PM     snU(       a  UR                  =(       d    0 O0 S.n0 n/ n0 nT	R                  R                  5        He  u  pxXs;   d  M  UR                  UR
                  UR                  4;   a  UR                  X7   5        OX7   XG'   US;   a  [        X7   5      OX7   Xg'   Mg     XTU4$ s  snf NrE  )r   r]   	r   r  r!  r"  r#  r$  r%  r  r   
r   r]   r\   r'  r(  r)  r   r*  r+  r,  s
            r,   r-  =_normalize_comparison_evaluator_func.<locals>._prepare_inputs*       !&07gnnR=ABTc 1r 1TBBI)>Br  "),)=)=)?%J!, ::!77!11*  !KK(;<181DF.  *-@@ *'*=>!(!4 &1 *@ ]22+  C   C)
c                F   >#    T" X5      u  p#nT" U0 UD6I S h  vN $  N7fr   r"   r   r]   r)  r(  r   r-  r   s        r,   r2  6_normalize_comparison_evaluator_func.<locals>.awrapperH  s-      %4D$B!q!4262222r4  r$   c                  > U UU(       a  UR                   O0 U  Vs/ s H  o"R                  =(       d    0 PM     snU(       a  UR                  =(       d    0 O0 S.n0 n/ n0 nT	R                  R                  5        He  u  pxXs;   d  M  UR                  UR
                  UR                  4;   a  UR                  X7   5        OX7   XG'   US;   a  [        X7   5      OX7   Xg'   Mg     XTU4$ s  snf rJ  rK  rL  s
            r,   r-  rM  W  rN  rO  c                *   > T" X5      u  p#nT" U0 UD6$ r   r"   rQ  s        r,   r7  5_normalize_comparison_evaluator_func.<locals>.wrapperu  s$     %4D$B!qT,V,,r+   )r   r	  r]   rp   rR   r9  )r   r	  r]   rp   rR   r   r:  r@  s   `       @@@@r,   r   r      sP    SN


D
!C&)nn&:&:&<X&<(%!--@W&<HX ,,..HE99G--333 	.
  
PX
 
 
 HDHq1C(CHDEJ11?0@ AFG 	 o  LT  	 
 Tz&&t,,3#3.?3(3<3#3.?3#3 3 4,, j)&& 
 _,,3#3.?3(3<-#-.?-#- - 4,, j)%% 
 O++k Y Es#    G:G:-H 6H +	H8Hc                |   [        U [        [        [        45      (       a  SU 0n U $ U (       d  [	        SU  35      e[        U [
        5      (       a,  [        S U  5       5      (       d  [	        SU  S35      eSU 0n U $ [        U [        5      (       a  SU 0n U $ [        U [        5      (       a   U $ [	        SU  35      e)	Nr9   zdExpected a non-empty dict, str, bool, int, float, list, EvaluationResult, or EvaluationResults. Got c              3  B   #    U  H  n[        U[        5      v   M     g 7fr   )rI   r>   )r   xs     r,   r   +_format_evaluator_result.<locals>.<genexpr>  s     71:a&&s   z8Expected a list of dicts or EvaluationResults. Received .rW   r   zZExpected a dict, str, bool, int, float, list, EvaluationResult, or EvaluationResults. Got )	rI   r   rK   rJ   r   r  r   r    r>   )r   s    r,   r   r     s     &4,--6"* M) ;;A(D
 	
 
FD	!	!7777J6(RST  V$ M 
FC	 	 6" M 
FD	!	! M	 &&,X/
 	
r+   c                  ^ ^^^	 Sm	[         R                  " T 5      mTR                  R                  5        VVs/ s H  u  pUPM	     nnnTR                  R                  5        VVs/ s H0  u  pUR                  [         R
                  R                  Ld  M.  UPM2     snnmU(       aB  [        UU	4S jU 5       5      (       dH  [        U Vs/ s H  oDT;  d  M
  UPM     sn5      S:w  a!  ST	 S3nU(       a	  USU S3-  n[        U5      e[        U	4S jU 5       5      (       a  USS	/:X  a  T $       SU U4S
 jjn[        T S5      (       a  [        T S5      Ul        U$ UR                  Ul        U$ s  snnf s  snnf s  snf )Nr   examplesr   r  r  c              3  D   >#    U  H  oT;   =(       d    UT;   v   M     g 7fr   r"   r  s     r,   r   /_normalize_summary_evaluator.<locals>.<genexpr>  r  r  r  r  rZ  z Received arguments c              3  ,   >#    U  H	  oT;   v   M     g 7fr   r"   )r   r  r  s     r,   r   r_    s     ?hU.(hr   r   r]  c           	     $  > U UU Vs/ s H  o"R                   PM     snU  Vs/ s H  o3R                  =(       d    0 PM     snU Vs/ s H  o"R                  =(       d    0 PM     snS.n0 n/ nTR                  R                  5        HM  u  pxXt;   d  M  UR                  UR
                  UR                  4;   a  UR                  XG   5        MG  XG   XW'   MO     T
" U0 UD6n	[        U	[        5      (       a  U	$ [        U	5      $ s  snf s  snf s  snf )Nr\  )r   r  r!  r"  r#  r$  r%  r  rI   r6   r   )r   r]  r]   r\   r'  r(  r)  r*  r+  r   r   r,  s             r,   r7  -_normalize_summary_evaluator.<locals>.wrapper  s    $9ABg>>B9=>#KK-2->KS%T8oo&;&;8%TG FD%(^^%9%9%;!
(zz33--&  G$78-4-@* &< 4*6*F&"233+F33) C>%Ts   DD
Dr$   )r   zSequence[schemas.Run]r]  zSequence[schemas.Example]rR   rr   )r   r;  r!  r"  r=  r>  r?  r   r   r   r   r   r$   )
r   r  rA  rB  rC  r  r7  r  r,  r  s
   `      @@@r,   _normalize_summary_evaluatorrc    s   SN


D
!C&)nn&:&:&<=&<(%&<H= ,,..HE99G--333 	.
  
PX
 
 
 HDHq1C(CHDEJ11?0@C 	 )(155Co ?h???8P D 	4'	43L	47	4 	4: *1z)B)BGD*% 	  IPHXHX 	 w > Es   E7(-E=E=	FF)r   r   )r   r   )r   r  rR   r   )r   r	   rR   ztuple[Union[Callable[[Run, Optional[Example]], _RUNNABLE_OUTPUT], Callable[[Run, Optional[Example]], Awaitable[_RUNNABLE_OUTPUT]]], Optional[Callable[..., dict]]])r   r	   rR   ztuple[Union[Callable[[Sequence[Run], Optional[Example]], _COMPARISON_OUTPUT], Callable[[Sequence[Run], Optional[Example]], Awaitable[_COMPARISON_OUTPUT]]], Optional[Callable[..., dict]]])r   z;Union[EvaluationResults, dict, str, int, bool, float, list]rR   zUnion[EvaluationResults, dict])r   r	   rR   SUMMARY_EVALUATOR_T)>r(   
__future__r   ri   r   loggingr   abcr   collections.abcr   r   	functoolsr   typingr   r	   r
   r   r   r   pydanticr   r   r   r   r   typing_extensionsr   r   r   rc   r   langsmith.schemasr   r   r   r   	getLoggerr$   rL   r   r.   r6   rU   rY   r>   r   rt   r   rx   r   r   r   r   r  r   r   r   r  rd  rc  r"   r+   r,   <module>ro     s   E "      /   T S ' '  B B			8	$'y '6Ye 6'y 'T"	 "Y Y8 )+<dBC  ( 5t;< e5, e5P	%	% I IX//
 #/G.
G.G.T@,
@,@,FG#8 	'++	 9: 112	4 	gkk	D12 112	4		 >r+   