o
    Yi|^                     @   s  d dl Zd dlmZ d dlmZ d dlmZmZm	Z	m
Z
mZmZmZmZmZmZmZmZmZmZmZmZmZmZ d,ddZdd	 Zd
d Zdd Zdd Zdd Zdd Zdd Z dd Z!dd Z"dd Z#dd Z$dd Z%d d! Z&d"d# Z'd$d% Z(d&d' Z)d(d) Z*d*d+ Z+dS )-    N)assert_allclosefmin)CensoredDatabetacauchychi2expongammagumbel_lgumbel_rinvgauss
invweibulllaplacelogisticlognormnctncx2normweibull_maxweibull_min c                 C   s   t | |||dddS )Ng-q=)argsdispxtolftolr   )funcx0r   r   r   r   U/tmp/pip-target-1s0edx8b/lib/python/scipy/stats/tests/test_continuous_fit_censored.py	optimizer   s   r   c                  C   st   t ddgddgddgddggd	} tj| d
dtd\}}}}t|ddd t|ddd |d
ks2J |dks8J dS )a  
    Test fitting beta shape parameters to interval-censored data.

    Calculation in R:

    > library(fitdistrplus)
    > data <- data.frame(left=c(0.10, 0.50, 0.75, 0.80),
    +                    right=c(0.20, 0.55, 0.90, 0.95))
    > result = fitdistcens(data, 'beta', control=list(reltol=1e-14))

    > result
    Fitting of the distribution ' beta ' on censored data by maximum likelihood
    Parameters:
           estimate
    shape1 1.419941
    shape2 1.027066
    > result$sd
       shape1    shape2
    0.9914177 0.6866565
    皙?皙?      ?皙?      ??皙?ffffff?intervalr      flocfscaler   g?h㈵>rtolg*Vn?N)r   r   fitr   r   )dataablocscaler   r   r   	test_beta   s   r7   c                  C   sD   t ddgdgd} tj| td\}}t|ddd t|d	dd d
S )a  
    Test fitting the Cauchy distribution to right-censored data.

    Calculation in R, with two values not censored [1, 10] and
    one right-censored value [30].

    > library(fitdistrplus)
    > data <- data.frame(left=c(1, 10, 30), right=c(1, 10, NA))
    > result = fitdistcens(data, 'cauchy', control=list(reltol=1e-14))
    > result
    Fitting of the distribution ' cauchy ' on censored data by maximum
    likelihood
    Parameters:
             estimate
    location 7.100001
    scale    7.455866
    r*   
      )
uncensoredrightr   g}if@r.   r/   gne@Nr   r   r1   r   r   r2   r5   r6   r   r   r   test_cauchy_right_censored5   s   r?   c                  C   sP   t ddgdgdgddggd} tj| td\}}t|dd	d
 t|dd	d
 dS )a\  
    Test fitting the Cauchy distribution to data with mixed censoring.

    Calculation in R, with:
    * two values not censored [1, 10],
    * one left-censored [1],
    * one right-censored [30], and
    * one interval-censored [[4, 8]].

    > library(fitdistrplus)
    > data <- data.frame(left=c(NA, 1, 4, 10, 30), right=c(1, 1, 8, 10, NA))
    > result = fitdistcens(data, 'cauchy', control=list(reltol=1e-14))
    > result
    Fitting of the distribution ' cauchy ' on censored data by maximum
    likelihood
    Parameters:
             estimate
    location 4.605150
    scale    5.900852
    r*   r8   r9         r:   leftr;   r)   r<   gqk@r.   r/   g%Zx@Nr=   r>   r   r   r   test_cauchy_mixedM   s   rD   c                  C   s`   t ddgdgdgddggd} tj| ddtd\}}}t|d	d
d |dks(J |dks.J dS )a=  
    Test fitting just the shape parameter (df) of chi2 to mixed data.

    Calculation in R, with:
    * two values not censored [1, 10],
    * one left-censored [1],
    * one right-censored [30], and
    * one interval-censored [[4, 8]].

    > library(fitdistrplus)
    > data <- data.frame(left=c(NA, 1, 4, 10, 30), right=c(1, 1, 8, 10, NA))
    > result = fitdistcens(data, 'chisq', control=list(reltol=1e-14))
    > result
    Fitting of the distribution ' chisq ' on censored data by maximum
    likelihood
    Parameters:
             estimate
    df 5.060329
    r*   r8   r9   r@   rA   rB   r   r+   ge =@r.   r/   N)r   r   r1   r   r   )r2   dfr5   r6   r   r   r   test_chi2_mixedi   s   rF   c                  C   s   g d} dgd dgd  }t | |}tj|dtd\}}|dks$J t||  }|j |j	  }|| }t
||d d	S )
a  
    For the exponential distribution with loc=0, the exact solution for
    fitting n uncensored points x[0]...x[n-1] and m right-censored points
    x[n]..x[n+m-1] is

        scale = sum(x)/n

    That is, divide the sum of all the values (not censored and
    right-censored) by the number of uncensored values.  (See, for example,
    https://en.wikipedia.org/wiki/Censoring_(statistics)#Likelihood.)

    The second derivative of the log-likelihood function is

        n/scale**2 - 2*sum(x)/scale**3

    from which the estimate of the standard error can be computed.

    -----

    Calculation in R, for reference only. The R results are not
    used in the test.

    > library(fitdistrplus)
    > dexps <- function(x, scale) {
    +     return(dexp(x, 1/scale))
    + }
    > pexps <- function(q, scale) {
    +     return(pexp(q, 1/scale))
    + }
    > left <- c(1, 2.5, 3, 6, 7.5, 10, 12, 12, 14.5, 15,
    +                                     16, 16, 20, 20, 21, 22)
    > right <- c(1, 2.5, 3, 6, 7.5, 10, 12, 12, 14.5, 15,
    +                                     NA, NA, NA, NA, NA, NA)
    > result = fitdistcens(data, 'exps', start=list(scale=mean(data$left)),
    +                      control=list(reltol=1e-14))
    > result
    Fitting of the distribution ' exps ' on censored data by maximum likelihood
    Parameters:
          estimate
    scale    19.85
    > result$sd
       scale
    6.277119
    )r*         @      g      @r8      rJ   g      -@      rL      rM         Fr8   TrI   r   r,   r   g:0yE>N)r   right_censoredr	   r1   r   lennum_censored_uncensoredsum_rightr   )obscensr2   r5   r6   ntotalexpectedr   r   r   test_expon_right_censored   s   .r\   c                  C   s^   t g ddgd dg } tj| dtd\}}}t|ddd |dks&J t|d	dd d
S )a  
    Fit gamma shape and scale to data with one right-censored value.

    Calculation in R:

    > library(fitdistrplus)
    > data <- data.frame(left=c(2.5, 2.9, 3.8, 9.1, 9.3, 12.0, 23.0, 25.0),
    +                    right=c(2.5, 2.9, 3.8, 9.1, 9.3, 12.0, 23.0, NA))
    > result = fitdistcens(data, 'gamma', start=list(shape=1, scale=10),
    +                      control=list(reltol=1e-13))
    > result
    Fitting of the distribution ' gamma ' on censored data by maximum
      likelihood
    Parameters:
          estimate
    shape 1.447623
    scale 8.360197
    > result$sd
        shape     scale
    0.7053086 5.1016531
    )rG   g333333@gffffff@g333333"@g"@g      (@g      7@g      9@r      r*   rP   gv)?r.   r/   g<k @N)r   rQ   r
   r1   r   r   )xr3   r5   r6   r   r   r   test_gamma_right_censored   s   
r_   c            	      C   s   t g d} t ddg}t ddgg}t| ||d}tj|td\}}t|ddd	 t|d
dd	 t|  | |dddddf  d}tj|td\}}t|ddd	 t|d
dd	 dS )aB  
    Fit gumbel_l and gumbel_r to censored data.

    This R calculation should match gumbel_r.

    > library(evd)
    > library(fitdistrplus)
    > data = data.frame(left=c(0, 2, 3, 9, 10, 10),
    +                   right=c(1, 2, 3, 9, NA, NA))
    > result = fitdistcens(data, 'gumbel',
    +                      control=list(reltol=1e-14),
    +                      start=list(loc=4, scale=5))
    > result
    Fitting of the distribution ' gumbel ' on censored data by maximum
    likelihood
    Parameters:
          estimate
    loc   4.487853
    scale 4.843640
       rH   	   r8   r   r*   )r;   r)   r<   g@r.   r/   gc*_@N)rC   r)   g)nparrayr   r   r1   r   r   r   )	r:   r;   r)   r2   r5   r6   data2loc2scale2r   r   r   test_gumbel   s   
ri   c                  C   s   g d} t | dgdgd}tj|ddtd\}}}t|dd	d
 |dks&J |dks,J tj|dtd\}}}t|dd	d
 |dksDJ t|dd	d
 dS )a  
    Fit just the shape parameter of invgauss to data with one value
    left-censored and one value right-censored.

    Calculation in R; using a fixed dispersion parameter amounts to fixing
    the scale to be 1.

    > library(statmod)
    > library(fitdistrplus)
    > left <- c(NA, 0.4813096, 0.5571880, 0.5132463, 0.3801414, 0.5904386,
    +           0.4822340, 0.3478597, 3, 0.7191797, 1.5810902, 0.4442299)
    > right <- c(0.15, 0.4813096, 0.5571880, 0.5132463, 0.3801414, 0.5904386,
    +            0.4822340, 0.3478597, NA, 0.7191797, 1.5810902, 0.4442299)
    > data <- data.frame(left=left, right=right)
    > result = fitdistcens(data, 'invgauss', control=list(reltol=1e-12),
    +                      fix.arg=list(dispersion=1), start=list(mean=3))
    > result
    Fitting of the distribution ' invgauss ' on censored data by maximum
      likelihood
    Parameters:
         estimate
    mean 0.853469
    Fixed parameters:
               value
    dispersion     1
    > result$sd
        mean
    0.247636

    Here's the R calculation with the dispersion as a free parameter to
    be fit.

    > result = fitdistcens(data, 'invgauss', control=list(reltol=1e-12),
    +                      start=list(mean=3, dispersion=1))
    > result
    Fitting of the distribution ' invgauss ' on censored data by maximum
    likelihood
    Parameters:
                estimate
    mean       0.8699819
    dispersion 1.2261362

    The parametrization of the inverse Gaussian distribution in the
    `statmod` package is not the same as in SciPy (see
        https://arxiv.org/abs/1603.06687
    for details).  The translation from R to SciPy is

        scale = 1/dispersion
        mu    = mean * dispersion

    > 1/result$estimate['dispersion']  # 1/dispersion
    dispersion
     0.8155701
    > result$estimate['mean'] * result$estimate['dispersion']
        mean
    1.066716

    Those last two values are the SciPy scale and shape parameters.
    )
g?g ({?gL`)l?g6<T?gy}?g;?g*TUC?gNn%?gj<%L?gC?Cn?g333333?rH   r:   rC   r;   r   r*   r+   g\d8O?g-C6
?r/   rP   gBD?gcw&?N)r   r   r1   r   r   )r^   r2   mur5   r6   r   r   r   test_invgauss  s   =rl   c                  C   s^   t g dddgddggd} tj| dtd\}}}t|ddd	 |dks&J t|d
dd	 dS )a  
    Fit invweibull to censored data.

    Here is the calculation in R.  The 'frechet' distribution from the evd
    package matches SciPy's invweibull distribution.  The `loc` parameter
    is fixed at 0.

    > library(evd)
    > library(fitdistrplus)
    > data = data.frame(left=c(0, 2, 3, 9, 10, 10),
    +                   right=c(1, 2, 3, 9, NA, NA))
    > result = fitdistcens(data, 'frechet',
    +                      control=list(reltol=1e-14),
    +                      start=list(loc=4, scale=5))
    > result
    Fitting of the distribution ' frechet ' on censored data by maximum
    likelihood
    Parameters:
           estimate
    scale 2.7902200
    shape 0.6379845
    Fixed parameters:
        value
    loc     0
    r`   r8   r   r*   r:   r;   r)   rP   gp[[x^j?r.   r/   g5)^R@N)r   r   r1   r   r   )r2   cr5   r6   r   r   r   test_invweibull^  s   ro   c                  C   s~   t g d} | | dk| dk@  }| | dk }| | dk }t|||d}tj|ddtd\}}t|dd	d
 t|dd	d
 dS )a  
    Fir the Laplace distribution to left- and right-censored data.

    Calculation in R:

    > library(fitdistrplus)
    > dlaplace <- function(x, location=0, scale=1) {
    +     return(0.5*exp(-abs((x - location)/scale))/scale)
    + }
    > plaplace <- function(q, location=0, scale=1) {
    +     z <- (q - location)/scale
    +     s <- sign(z)
    +     f <- -s*0.5*exp(-abs(z)) + (s+1)/2
    +     return(f)
    + }
    > left <- c(NA, -41.564, 50.0, 15.7384, 50.0, 10.0452, -2.0684,
    +           -19.5399, 50.0,   9.0005, 27.1227, 4.3113, -3.7372,
    +           25.3111, 14.7987,  34.0887,  50.0, 42.8496, 18.5862,
    +           32.8921, 9.0448, -27.4591, NA, 19.5083, -9.7199)
    > right <- c(-50.0, -41.564,  NA, 15.7384, NA, 10.0452, -2.0684,
    +            -19.5399, NA, 9.0005, 27.1227, 4.3113, -3.7372,
    +            25.3111, 14.7987, 34.0887, NA,  42.8496, 18.5862,
    +            32.8921, 9.0448, -27.4591, -50.0, 19.5083, -9.7199)
    > data <- data.frame(left=left, right=right)
    > result <- fitdistcens(data, 'laplace', start=list(location=10, scale=10),
    +                       control=list(reltol=1e-13))
    > result
    Fitting of the distribution ' laplace ' on censored data by maximum
      likelihood
    Parameters:
             estimate
    location 14.79870
    scale    30.93601
    > result$sd
         location     scale
    0.1758864 7.0972125
    )      Igx&1D      I@gz/@rq   gSt$$@g_L gC63rq   gK7A "@g8gDi;@g\m>@gg?O9@b4-@gޓZA@rq   g?W[lE@gK42@g|a2U0r@@g"@gݓu;rp   g3@gǘp#rp   2   rq   rj   r8   r5   r6   r   rr   r.   r/   gY>@N)rd   re   r   r   r1   r   r   )rW   r^   rC   r;   r2   r5   r6   r   r   r   test_laplace  s   'ru   c                  C   sR   t g d} tj| | dkd}tj|td\}}t|ddd t|dd	d d
S )a  
    Fit the logistic distribution to left-censored data.

    Calculation in R:
    > library(fitdistrplus)
    > left = c(13.5401, 37.4235, 11.906 , 13.998 ,  NA    ,  0.4023,  NA    ,
    +          10.9044, 21.0629,  9.6985,  NA    , 12.9016, 39.164 , 34.6396,
    +          NA    , 20.3665, 16.5889, 18.0952, 45.3818, 35.3306,  8.4949,
    +          3.4041,  NA    ,  7.2828, 37.1265,  6.5969, 17.6868, 17.4977,
    +          16.3391, 36.0541)
    > right = c(13.5401, 37.4235, 11.906 , 13.998 ,  0.    ,  0.4023,  0.    ,
    +           10.9044, 21.0629,  9.6985,  0.    , 12.9016, 39.164 , 34.6396,
    +           0.    , 20.3665, 16.5889, 18.0952, 45.3818, 35.3306,  8.4949,
    +           3.4041,  0.    ,  7.2828, 37.1265,  6.5969, 17.6868, 17.4977,
    +           16.3391, 36.0541)
    > data = data.frame(left=left, right=right)
    > result = fitdistcens(data, 'logis', control=list(reltol=1e-14))
    > result
    Fitting of the distribution ' logis ' on censored data by maximum
      likelihood
    Parameters:
              estimate
    location 14.633459
    scale     9.232736
    > result$sd
    location    scale
    2.931505 1.546879
    )g#+@g|?5B@gZd;'@g"+@        g:H?rv   g;M%@g65@gʡe#@rv   g%䃞)@gEC@gBiQA@rv   g]4@gI&0@gF_2@gpΈްF@g_QA@gec @gAǘ;@rv   g6<R!@gx&1B@g9c@gү1@g8gDi1@gAV0@gW[B@r   )censoredr<   gTD-@gƠ>r/   g&4I,)w"@r.   N)rd   re   r   left_censoredr   r1   r   r   )r^   r2   r5   r6   r   r   r   test_logistic  s
   ry   c                  C   s|   g d} t | dgd  dgt|  dgd  }tj|dd\}}}|dks)J t|}t|ddd	 t|d
dd	 dS )a#  
    Ref: https://math.montana.edu/jobo/st528/documents/relc.pdf

    The data is the locomotive control time to failure example that starts
    on page 8.  That's the 8th page in the PDF; the page number shown in
    the text is 270).
    The document includes SAS output for the data.
    )%g     6@g     B@g      G@g     @H@g     I@g     J@g     @K@g     L@g     P@g      Q@g     `Q@g      S@g     @S@g     S@g      T@g     `T@g     T@g     T@g      U@g     V@g     `W@g     Y@g     Z@g      [@g      \@g     `\@g      ]@g     @]@g     ]@g     ]@g      ^@g     ^@g     ^@g     _@g     ``@g     `@g     `@   ;   r   r*   r,   g3w@Mb@?r/   g~jt?g{Gzt?N)r   rQ   rR   r   r1   rd   logr   )miles_to_failr2   sigmar5   r6   rk   r   r   r   test_lognorm  s   
r   c                  C   s   t g dg d} tjdd tj| ddtd\}}}}W d   n1 s(w   Y  t|d	d
d t|dd
d |dksAJ |dksGJ dS )a
  
    Test fitting the noncentral t distribution to censored data.

    Calculation in R:

    > library(fitdistrplus)
    > data <- data.frame(left=c(1, 2, 3, 5, 8, 10, 25, 25),
    +                    right=c(1, 2, 3, 5, 8, 10, NA, NA))
    > result = fitdistcens(data, 't', control=list(reltol=1e-14),
    +                      start=list(df=1, ncp=2))
    > result
    Fitting of the distribution ' t ' on censored data by maximum likelihood
    Parameters:
         estimate
    df  0.5432336
    ncp 2.8893565

    )r*   ra   rH      rA   r8      r   )r   r   r   r   r   r   r*   r*   ignoreoverr   r*   r+   NgBn+b?r.   r/   gTf@)r   rQ   rd   errstater   r1   r   r   )r2   rE   ncr5   r6   r   r   r   test_nct  s   

r   c                  C   s   t g dddgddggd} tjdd tj| dd	td
\}}}}W d   n1 s,w   Y  t|ddd t|ddd |dksEJ |d	ksKJ dS )a  
    Test fitting the shape parameters (df, ncp) of ncx2 to mixed data.

    Calculation in R, with
    * 5 not censored values [2.7, 0.2, 6.5, 0.4, 0.1],
    * 1 interval-censored value [[0.6, 1.0]], and
    * 2 right-censored values [8, 8].

    > library(fitdistrplus)
    > data <- data.frame(left=c(2.7, 0.2, 6.5, 0.4, 0.1, 0.6, 8, 8),
    +                    right=c(2.7, 0.2, 6.5, 0.4, 0.1, 1.0, NA, NA))
    > result = fitdistcens(data, 'chisq', control=list(reltol=1e-14),
    +                      start=list(df=1, ncp=2))
    > result
    Fitting of the distribution ' chisq ' on censored data by maximum
    likelihood
    Parameters:
        estimate
    df  1.052871
    ncp 2.362934
    )g@r!   g      @g?r    rA   g333333?g      ?rm   r   r   r   r*   r+   NgQB?r.   r/   g
I@)r   rd   r   r   r1   r   r   )r2   rE   ncpr5   r6   r   r   r   	test_ncx2  s   
r   c                  C   sT   t ddgddgddgddggd	} tj| td
\}}t|ddd t|ddd dS )a  
    Test fitting the normal distribution to interval-censored data.

    Calculation in R:

    > library(fitdistrplus)
    > data <- data.frame(left=c(0.10, 0.50, 0.75, 0.80),
    +                    right=c(0.20, 0.55, 0.90, 0.95))
    > result = fitdistcens(data, 'norm', control=list(reltol=1e-14))

    > result
    Fitting of the distribution ' norm ' on censored data by maximum likelihood
    Parameters:
          estimate
    mean 0.5919990
    sd   0.2868042
    > result$sd
         mean        sd
    0.1444432 0.1029451
    r    r!   r"   r#   r$   r%   r&   r'   r(   r<   gux?r.   r/   g  [?N)r   r   r1   r   r   r>   r   r   r   	test_norm>  s   r   c                  C   s   d} t dd dd | dD D  \}}t||}tj|dd\}}}t|dd	d
 |dks2J t|dd	d
 tt	| |}t
j|dd\}}	}
t|dd	d
 |	dksZJ t|
dd	d
 d S )Nz>3,5,6*,8,10*,11*,15,20*,22,23,27*,29,32,35,40,26,28,33*,21,24*c                 S   s$   g | ]}t |d  t|dkfqS )r   ra   )floatrR   ).0tr   r   r   
<listcomp>d  s    z*test_weibull_censored1.<locals>.<listcomp>c                 S   s   g | ]}| d qS )*)splitr   wr   r   r   r   e  s    ,r   r|   gx&1@gMbP?r/   g=
ףp<@)zipr   r   rQ   r   r1   r   rx   rd   re   r   )stimesrX   r2   rn   r5   r6   rf   c2rg   rh   r   r   r   test_weibull_censored1^  s   r   c                  C   s   d} t dd |  D ddj\}}|d }t||}tj|dt	d\}}}t
|d	d
d t
|ddd |dks>J d S )Na  
           450 0    460 1   1150 0   1150 0   1560 1
          1600 0   1660 1   1850 1   1850 1   1850 1
          1850 1   1850 1   2030 1   2030 1   2030 1
          2070 0   2070 0   2080 0   2200 1   3000 1
          3000 1   3000 1   3000 1   3100 0   3200 1
          3450 0   3750 1   3750 1   4150 1   4150 1
          4150 1   4150 1   4300 1   4300 1   4300 1
          4300 1   4600 0   4850 1   4850 1   4850 1
          4850 1   5000 1   5000 1   5000 1   6100 1
          6100 0   6100 1   6100 1   6300 1   6450 1
          6450 1   6700 1   7450 1   7800 1   7800 1
          8100 1   8100 1   8200 1   8500 1   8500 1
          8500 1   8750 1   8750 0   8750 1   9400 1
          9900 1  10100 1  10100 1  10100 1  11500 1
    c                 S   s   g | ]}t |qS r   )intr   r   r   r   r     s    z)test_weibull_min_sas1.<locals>.<listcomp>rc   ra   g     @@r   rP   ga4?g-C6?r/   gsK:@gh㈵>)rd   re   r   reshapeTr   rQ   r   r1   r   r   )textliferX   r2   rn   r5   r6   r   r   r   test_weibull_min_sas1{  s   &r   c                  C   sz   t g d} t| dgt| d  dgd  }tj|dddtd\}}}t|ddd	 t|d
dd	 t|ddd	 d S )N)         r                                    i	  i0  r      r   ra   r*   d   rt   gTR'@r}   r/   gR^@gHz[@)	rd   re   r   rQ   rR   r   r1   r   r   )daysr2   rn   r5   r6   r   r   r   test_weibull_min_sas2  s   $r   )r   r   ),numpyrd   numpy.testingr   scipy.optimizer   scipy.statsr   r   r   r   r	   r
   r   r   r   r   r   r   r   r   r   r   r   r   r   r7   r?   rD   rF   r\   r_   ri   rl   ro   ru   ry   r   r   r   r   r   r   r   r   r   r   r   <module>   s.   P
#>"(Q$4)! !