@@ -226,6 +226,166 @@ returns a vector of quantiles, respectively at `[0.0, 0.2, 0.4, 0.6, 0.8, 1.0]`.
226226"""
227227nquantile (x, n:: Integer ) = quantile (x, (0 : n)/ n)
228228
229+ """
230+ quantilerank(itr, value; method=:inc)
231+
232+ Compute the quantile position in the [0, 1] interval of `value` relative to collection `itr`.
233+
234+ Different definitions can be chosen via the `method` keyword argument.
235+ Let `count_less` be the number of elements of `itr` that are less than `value`,
236+ `count_equal` the number of elements of `itr` that are equal to `value`, `n` the length of `itr`,
237+ `greatest_smaller` the highest value below `value` and `smallest_greater` the lowest value above `value`.
238+ Then `method` supports the following definitions:
239+
240+ - `:inc` (default): Return a value in the range 0 to 1 inclusive.
241+ Return `count_less / (n - 1)` if `value ∈ itr`, otherwise apply interpolation based on
242+ definition 7 of quantile in Hyndman and Fan (1996)
243+ (equivalent to Excel `PERCENTRANK` and `PERCENTRANK.INC`).
244+ This definition corresponds to the lower semi-continuous inverse of
245+ [`quantile`](@ref) with its default parameters.
246+
247+ - `:exc`: Return a value in the range 0 to 1 exclusive.
248+ Return `(count_less + 1) / (n + 1)` if `value ∈ itr` otherwise apply interpolation
249+ based on definition 6 of quantile in Hyndman and Fan (1996)
250+ (equivalent to Excel `PERCENTRANK.EXC`).
251+
252+ - `:compete`: Return `count_less / (n - 1)` if `value ∈ itr`, otherwise
253+ return `(count_less - 1) / (n - 1)`, without interpolation
254+ (equivalent to MariaDB `PERCENT_RANK`, dplyr `percent_rank`).
255+
256+ - `:tied`: Return `(count_less + count_equal/2) / n`, without interpolation.
257+ Based on the definition in Roscoe, J. T. (1975)
258+ (equivalent to `"mean"` kind of SciPy `percentileofscore`).
259+
260+ - `:strict`: Return `count_less / n`, without interpolation
261+ (equivalent to `"strict"` kind of SciPy `percentileofscore`).
262+
263+ - `:weak`: Return `(count_less + count_equal) / n`, without interpolation
264+ (equivalent to `"weak"` kind of SciPy `percentileofscore`).
265+
266+ !!! note
267+ An `ArgumentError` is thrown if `itr` contains `NaN` or `missing` values
268+ or if `itr` contains fewer than two elements.
269+
270+ # References
271+ Roscoe, J. T. (1975). [Fundamental Research Statistics for the Behavioral Sciences]
272+ (http://www.bryanburnham.net/wp-content/uploads/2014/07/Fundamental-Statistics-for-the-Behavioral-Sciences-v2.0.pdf#page=57)",
273+ 2nd ed., New York : Holt, Rinehart and Winston.
274+
275+ Hyndman, R.J and Fan, Y. (1996) "[Sample Quantiles in Statistical Packages]
276+ (https://www.amherst.edu/media/view/129116/original/Sample+Quantiles.pdf)",
277+ *The American Statistician*, Vol. 50, No. 4, pp. 361-365.
278+
279+ # Examples
280+ ```julia
281+ julia> using StatsBase
282+
283+ julia> v1 = [1, 1, 1, 2, 3, 4, 8, 11, 12, 13];
284+
285+ julia> v2 = [1, 2, 3, 5, 6, missing, 8];
286+
287+ julia> v3 = [1, 2, 3, 4, 4, 5, 6, 7, 8, 9];
288+
289+ julia> quantilerank(v1, 2)
290+ 0.3333333333333333
291+
292+ julia> quantilerank(v1, 2, method=:exc), quantilerank(v1, 2, method=:tied)
293+ (0.36363636363636365, 0.35)
294+
295+ # use `skipmissing` for vectors with missing entries.
296+ julia> quantilerank(skipmissing(v2), 4)
297+ 0.5
298+
299+ # use broadcasting with `Ref` to compute quantile rank for multiple values
300+ julia> quantilerank.(Ref(v3), [4, 8])
301+ 2-element Vector{Float64}:
302+ 0.3333333333333333
303+ 0.8888888888888888
304+ ```
305+ """
306+ function quantilerank (itr, value; method:: Symbol = :inc )
307+ ((value isa Number && isnan (value)) || ismissing (value)) &&
308+ throw (ArgumentError (" `value` cannot be NaN or missing" ))
309+ any (x -> ismissing (x) || (x isa Number && isnan (x)), itr) &&
310+ throw (ArgumentError (" `itr` cannot contain missing or NaN entries" ))
311+
312+ count_less = count_equal = n = 0
313+ greatest_smaller = smallest_greater = value
314+ for x in itr
315+ if x == value
316+ count_equal += 1
317+ elseif x < value
318+ count_less += 1
319+ if greatest_smaller == value || greatest_smaller < x
320+ greatest_smaller = x
321+ end
322+ else
323+ if smallest_greater == value || smallest_greater > x
324+ smallest_greater = x
325+ end
326+ end
327+ n += 1
328+ end
329+
330+ n == 0 && throw (ArgumentError (" `itr` is empty. Pass a collection with at least two elements" ))
331+ n == 1 && throw (ArgumentError (" `itr` has only 1 value. Pass a collection with at least two elements" ))
332+
333+ if method == :inc
334+ if greatest_smaller == value
335+ return 0.0
336+ elseif count_equal > 0
337+ return count_less / (n - 1 )
338+ elseif smallest_greater == value
339+ return 1.0
340+ else
341+ lower = (count_less - 1 ) / (n - 1 )
342+ upper = count_less / (n - 1 )
343+ ratio = (value - greatest_smaller) / (smallest_greater - greatest_smaller)
344+ return lower + ratio * (upper - lower)
345+ end
346+ elseif method == :exc
347+ if count_less == 0 && count_equal == 0
348+ return 0.0
349+ elseif count_less == 0
350+ return 1.0 / (n + 1 )
351+ elseif count_equal > 0
352+ return (count_less + 1 ) / (n + 1 )
353+ elseif smallest_greater == value
354+ return 1.0
355+ else
356+ lower = count_less / (n + 1 )
357+ upper = (count_less + 1 ) / (n + 1 )
358+ ratio = (value - greatest_smaller) / (smallest_greater - greatest_smaller)
359+ return lower + ratio * (upper - lower)
360+ end
361+ elseif method == :compete
362+ if value > maximum (itr)
363+ return 1.0
364+ elseif value ≤ minimum (itr)
365+ return 0.0
366+ else
367+ value ∈ itr && (count_less += 1 )
368+ return (count_less - 1 ) / (n - 1 )
369+ end
370+ elseif method == :tied
371+ return (count_less + count_equal/ 2 ) / n
372+ elseif method == :strict
373+ return count_less / n
374+ elseif method == :weak
375+ return (count_less + count_equal) / n
376+ else
377+ throw (ArgumentError (" method=:$method is not valid. Pass :inc, :exc, :compete, :tied, :strict or :weak." ))
378+ end
379+ end
380+
381+ """
382+ percentilerank(itr, value; method=:inc)
383+
384+ Return the `q`th percentile of `value` in collection `itr`, i.e. [`quantilerank(itr, value)`](@ref) * 100.
385+
386+ See the [`quantilerank`](@ref) docstring for more details.
387+ """
388+ percentilerank (itr, value; method:: Symbol = :inc ) = quantilerank (itr, value, method= method) * 100
229389
230390# ############################
231391#
0 commit comments