Changeset 4588 for trunk/library

Show
Ignore:
Timestamp:
11/11/08 04:02:02 (2 months ago)
Author:
pmoura
Message:

Modified the "statistics" library to use the variance stable algorithm to calculate accurate values for the standard deviation for both samples and populations. Thanks to Parker Jones for the bug report.

Location:
trunk/library
Files:
3 modified

Legend:

Unmodified
Added
Removed
  • trunk/library/population.lgt

    r4483 r4588  
    44 
    55    :- info([ 
    6         version is 1.0, 
     6        version is 1.1, 
    77        author is 'Paulo Moura', 
    8         date is 2008/10/4, 
     8        date is 2008/11/11, 
    99        comment is 'Statistical population represented as a list of numbers.']). 
    1010 
    11     skewness([X| Xs], Skewness) :- 
    12         :arithmetic_mean(Xs, 1, Length, X, Mean), 
    13         X2 is (X - Mean) ** 2, 
    14         X3 is (X - Mean) ** 3, 
    15         :squares_and_cubes(Xs, Mean, X2, Squares, X3, Cubes), 
    16         Skew is (Cubes / Length) / (Squares / Length) ** 1.5, 
    17         Skewness is sqrt(Length*(Length - 1)) * Skew / (Length - 2). 
     11    skewness([X| Xs], Skewness) :- 
     12        :arithmetic_mean(Xs, 1, Length, X, Mean), 
     13        X2 is (X - Mean) ** 2, 
     14        X3 is (X - Mean) ** 3, 
     15        :squares_and_cubes(Xs, Mean, X2, Squares, X3, Cubes), 
     16        Skew is (Cubes / Length) / (Squares / Length) ** 1.5, 
     17        Skewness is sqrt(Length*(Length - 1)) * Skew / (Length - 2). 
    1818 
    19     variance([X| Xs], Variance) :- 
    20         :variance(Xs, 1, Length, X, 0, M2), 
    21         Variance is M2 / Length. 
     19    standard_deviation([X| Xs], Deviation) :- 
     20        :variance(Xs, 1, Length, X, 0, M2), 
     21        Deviation is sqrt(M2 / Length). 
     22 
     23    variance([X| Xs], Variance) :- 
     24        :variance(Xs, 1, Length, X, 0, M2), 
     25        Variance is M2 / Length. 
    2226 
    2327:- end_object. 
  • trunk/library/sample.lgt

    r4483 r4588  
    44 
    55    :- info([ 
    6         version is 1.0, 
     6        version is 1.1, 
    77        author is 'Paulo Moura', 
    8         date is 2008/10/4, 
     8        date is 2008/11/11, 
    99        comment is 'Statistical sample represented as a list of numbers.']). 
    1010 
    11     skewness([X| Xs], Skewness) :- 
    12         :arithmetic_mean(Xs, 1, Length, X, Mean), 
    13         X2 is (X - Mean) ** 2, 
    14         X3 is (X - Mean) ** 3, 
    15         :squares_and_cubes(Xs, Mean, X2, Squares, X3, Cubes), 
    16         Skewness is (Cubes / Length) / (Squares / Length) ** 1.5. 
     11    skewness([X| Xs], Skewness) :- 
     12        :arithmetic_mean(Xs, 1, Length, X, Mean), 
     13        X2 is (X - Mean) ** 2, 
     14        X3 is (X - Mean) ** 3, 
     15        :squares_and_cubes(Xs, Mean, X2, Squares, X3, Cubes), 
     16        Skewness is (Cubes / Length) / (Squares / Length) ** 1.5. 
    1717 
    18     variance([X| Xs], Variance) :- 
    19         :variance(Xs, 1, Length, X, 0, M2), 
    20         Variance is M2 / (Length - 1). 
     18    standard_deviation([X| Xs], Deviation) :- 
     19        :variance(Xs, 1, Length, X, 0, M2), 
     20        Deviation is sqrt(M2 / (Length - 1)). 
     21 
     22    variance([X| Xs], Variance) :- 
     23        :variance(Xs, 1, Length, X, 0, M2), 
     24        Variance is M2 / (Length - 1). 
    2125 
    2226:- end_object. 
  • trunk/library/statistics.lgt

    r4483 r4588  
    44 
    55    :- info([ 
    6         version is 1.0, 
     6        version is 1.1, 
    77        author is 'Paulo Moura', 
    8         date is 2008/10/4, 
     8        date is 2008/11/11, 
    99        comment is 'Statistical calculations over a list of numbers.']). 
    1010 
     
    4040        harmonic_mean(Xs, Lacc2, Sacc2, Mean). 
    4141 
    42     mean_deviation([X| Xs], Deviation) :- 
    43         arithmetic_mean(Xs, 1, Length, X, Mean), 
    44         average_deviation([X| Xs], Mean, 1, Length, 0, Sum), 
    45         Deviation is Sum / Length. 
     42    mean_deviation([X| Xs], Deviation) :- 
     43        arithmetic_mean(Xs, 1, Length, X, Mean), 
     44        average_deviation([X| Xs], Mean, 1, Length, 0, Sum), 
     45        Deviation is Sum / Length. 
    4646 
    47     median_deviation([X| Xs], Deviation) :- 
    48         median([X| Xs], Median, Length), 
    49         average_deviation([X| Xs], Median, 1, Length, 0, Sum), 
    50         Deviation is Sum / Length. 
     47    median_deviation([X| Xs], Deviation) :- 
     48        median([X| Xs], Median, Length), 
     49        average_deviation([X| Xs], Median, 1, Length, 0, Sum), 
     50        Deviation is Sum / Length. 
    5151 
    52     average_deviation([X| Xs], CentralTendency, Deviation) :- 
    53         average_deviation([X| Xs], CentralTendency, 1, Length, 0, Sum), 
    54         Deviation is Sum / Length. 
     52    average_deviation([X| Xs], CentralTendency, Deviation) :- 
     53        average_deviation([X| Xs], CentralTendency, 1, Length, 0, Sum), 
     54        Deviation is Sum / Length. 
    5555 
    56     average_deviation([], _, Length, Length, Sum, Sum). 
    57     average_deviation([X| Xs], CentralTendency, Lacc, Length, Sacc, Sum) :- 
     56    average_deviation([], _, Length, Length, Sum, Sum). 
     57    average_deviation([X| Xs], CentralTendency, Lacc, Length, Sacc, Sum) :- 
    5858        Lacc2 is Lacc + 1, 
    5959        Sacc2 is Sacc + abs(X - CentralTendency), 
    60         average_deviation(Xs, CentralTendency, Lacc2, Length, Sacc2, Sum). 
     60        average_deviation(Xs, CentralTendency, Lacc2, Length, Sacc2, Sum). 
    6161 
    62     standard_deviation([X| Xs], Deviation) :- 
    63         X2 is X ** 2, 
    64         standard_deviation(Xs, 1, Length, X2, Squares, X, Mean), 
    65         Deviation is sqrt(Squares / Length - Mean ** 2). 
     62    coefficient_of_variation([X| Xs], Coefficient) :- 
     63        ::standard_deviation([X| Xs], Deviation), 
     64        arithmetic_mean([X| Xs], Mean), 
     65        Coefficient is Deviation / Mean. 
    6666 
    67     standard_deviation([], Length, Length, Squares, Squares, Sum, Mean) :- 
    68         Mean is Sum / Length. 
    69     standard_deviation([X| Xs], Lacc, Length, Sacc, Squares, Macc, Mean) :- 
    70         Lacc2 is Lacc + 1, 
    71         Sacc2 is Sacc + X ** 2, 
    72         Macc2 is Macc + X, 
    73         standard_deviation(Xs, Lacc2, Length, Sacc2, Squares, Macc2, Mean). 
     67    relative_standard_deviation(Xs, Percentage) :- 
     68        coefficient_of_variation(Xs, Coefficient), 
     69        Percentage is Coefficient * 100. 
    7470 
    75     coefficient_of_variation([X| Xs], Coefficient) :- 
    76         standard_deviation([X| Xs], Deviation), 
    77         arithmetic_mean([X| Xs], Mean), 
    78         Coefficient is Deviation / Mean. 
    79  
    80     relative_standard_deviation(Xs, Percentage) :- 
    81         coefficient_of_variation(Xs, Coefficient), 
    82         Percentage is Coefficient * 100. 
    83  
    84     variance([X| Xs], Variance) :- 
    85         variance(Xs, 1, Length, X, 0, M2), 
    86         Variance is M2 / (Length - 1). 
    87  
    88     variance([], Length, Length, _, M2, M2). 
    89     variance([X| Xs], Lacc, Length, Mean, M2acc, M2) :- 
     71    variance([], Length, Length, _, M2, M2). 
     72    variance([X| Xs], Lacc, Length, Mean, M2acc, M2) :- 
    9073        Lacc2 is Lacc + 1, 
    9174        Delta is X - Mean, 
    92         Mean2 is Mean + Delta/Lacc2, 
    93         M2acc2 is M2acc + Delta*(X - Mean2), 
    94         variance(Xs, Lacc2, Length, Mean2, M2acc2, M2). 
     75        Mean2 is Mean + Delta/Lacc2, 
     76        M2acc2 is M2acc + Delta*(X - Mean2), 
     77        variance(Xs, Lacc2, Length, Mean2, M2acc2, M2). 
    9578 
    96     squares_and_cubes([], _, Squares, Squares, Cubes, Cubes). 
    97     squares_and_cubes([X| Xs], Mean, Sacc, Squares, Cacc, Cubes) :- 
    98         Sacc2 is Sacc + (X - Mean) ** 2, 
    99         Cacc2 is Cacc + (X - Mean) ** 3, 
    100         squares_and_cubes(Xs, Mean, Sacc2, Squares, Cacc2, Cubes). 
     79    squares_and_cubes([], _, Squares, Squares, Cubes, Cubes). 
     80    squares_and_cubes([X| Xs], Mean, Sacc, Squares, Cacc, Cubes) :- 
     81        Sacc2 is Sacc + (X - Mean) ** 2, 
     82        Cacc2 is Cacc + (X - Mean) ** 3, 
     83        squares_and_cubes(Xs, Mean, Sacc2, Squares, Cacc2, Cubes). 
    10184 
    102     median([X| Xs], Median) :- 
    103         median([X| Xs], Median, _). 
     85    median([X| Xs], Median) :- 
     86        median([X| Xs], Median, _). 
    10487 
    105     median([X| Xs], Median, Length) :- 
    106         quicksort([X| Xs], [], Sorted, 0, Length), 
    107         (   Length mod 2 =:= 1 -> 
    108             Middle is Length // 2 + 1, 
    109             middle_element(1, Middle, Sorted, Median) 
    110         ;   Left is Length // 2, 
    111             middle_elements(1, Left, Sorted, XLeft, XRight), 
    112             Median is XLeft + (XRight - XLeft) / 2 
    113         ). 
     88    median([X| Xs], Median, Length) :- 
     89        quicksort([X| Xs], [], Sorted, 0, Length), 
     90        (   Length mod 2 =:= 1 -> 
     91            Middle is Length // 2 + 1, 
     92            middle_element(1, Middle, Sorted, Median) 
     93        ;   Left is Length // 2, 
     94            middle_elements(1, Left, Sorted, XLeft, XRight), 
     95            Median is XLeft + (XRight - XLeft) / 2 
     96        ). 
    11497 
    11598    quicksort([], Sorted, Sorted, Length, Length). 
     
    128111        partition(Xs, Pivot, Small1, Large1). 
    129112 
    130     middle_element(N, N, [X| _], X) :- 
    131         !. 
    132     middle_element(M, N, [_| Xs], X) :- 
    133         M2 is M + 1, 
    134         middle_element(M2, N, Xs, X). 
     113    middle_element(N, N, [X| _], X) :- 
     114        !. 
     115    middle_element(M, N, [_| Xs], X) :- 
     116        M2 is M + 1, 
     117        middle_element(M2, N, Xs, X). 
    135118 
    136     middle_elements(N, N, [X1, X2| _], X1, X2) :- 
    137         !. 
    138     middle_elements(M, N, [_| Xs], X1, X2) :- 
    139         M2 is M + 1, 
    140         middle_elements(M2, N, Xs, X1, X2). 
     119    middle_elements(N, N, [X1, X2| _], X1, X2) :- 
     120        !. 
     121    middle_elements(M, N, [_| Xs], X1, X2) :- 
     122        M2 is M + 1, 
     123        middle_elements(M2, N, Xs, X1, X2). 
    141124 
    142125    min([X| Xs], Min) :- 
     
    160143        ). 
    161144 
    162     range([X| Xs], Range) :- 
     145    range([X| Xs], Range) :- 
    163146        range(Xs, X, Min, X, Max), 
    164147        Range is Max - Min. 
    165148 
    166     range([], Min, Min, Max, Max). 
    167     range([X| Xs], CurrentMin, Min, CurrentMax, Max) :- 
    168         (   X < Min -> 
    169             range(Xs, X, Min, CurrentMax, Max) 
    170         ;   X > Max -> 
    171             range(Xs, CurrentMin, Min, X, Max) 
    172         ;   range(Xs, CurrentMin, Min, CurrentMax, Max) 
    173         ). 
     149    range([], Min, Min, Max, Max). 
     150    range([X| Xs], CurrentMin, Min, CurrentMax, Max) :- 
     151        (   X < Min -> 
     152            range(Xs, X, Min, CurrentMax, Max) 
     153        ;   X > Max -> 
     154            range(Xs, CurrentMin, Min, X, Max) 
     155        ;   range(Xs, CurrentMin, Min, CurrentMax, Max) 
     156        ). 
    174157 
    175158    product([X| Xs], Product) :-