@@ -253,7 +253,7 @@ def _calendar_month_middles(year):
253253 """list of middle day of each month, used by Linke turbidity lookup"""
254254 # remove mdays[0] since January starts at mdays[1]
255255 # make local copy of mdays since we need to change February for leap years
256- mdays = np .array (calendar .mdays [1 :])
256+ mdays = np .array (calendar .mdays [1 :])
257257 ydays = 365
258258 # handle leap years
259259 if calendar .isleap (year ):
@@ -533,3 +533,190 @@ def _calc_d(w, aod700, p):
533533 d = - 0.337 * aod700 ** 2 + 0.63 * aod700 + 0.116 + dp * np .log (p / p0 )
534534
535535 return d
536+
537+
538+ def detect_clearsky (measured , clearsky , times , window_length ,
539+ mean_diff = 75 , max_diff = 75 ,
540+ lower_line_length = - 5 , upper_line_length = 10 ,
541+ var_diff = 0.005 , slope_dev = 8 , max_iterations = 20 ,
542+ return_components = False ):
543+ """
544+ Detects clear sky times according to the algorithm developed by Reno
545+ and Hansen for GHI measurements [1]. The algorithm was designed and
546+ validated for analyzing GHI time series only. Users may attempt to
547+ apply it to other types of time series data using different filter
548+ settings, but should be skeptical of the results.
549+
550+ The algorithm detects clear sky times by comparing statistics for a
551+ measured time series and an expected clearsky time series.
552+ Statistics are calculated using a sliding time window (e.g., 10
553+ minutes). An iterative algorithm identifies clear periods, uses the
554+ identified periods to estimate bias in the clearsky data, scales the
555+ clearsky data and repeats.
556+
557+ Clear times are identified by meeting 5 criteria. Default values for
558+ these thresholds are appropriate for 10 minute windows of 1 minute
559+ GHI data.
560+
561+ Parameters
562+ ----------
563+ measured : array or Series
564+ Time series of measured values.
565+ clearsky : array or Series
566+ Time series of the expected clearsky values.
567+ times : DatetimeIndex
568+ Times of measured and clearsky values.
569+ window_length : int
570+ Length of sliding time window in minutes. Must be greater than 2
571+ periods.
572+ mean_diff : float
573+ Threshold value for agreement between mean values of measured
574+ and clearsky in each interval, see Eq. 6 in [1].
575+ max_diff : float
576+ Threshold value for agreement between maxima of measured and
577+ clearsky values in each interval, see Eq. 7 in [1].
578+ lower_line_length : float
579+ Lower limit of line length criterion from Eq. 8 in [1].
580+ Criterion satisfied when
581+ lower_line_length < line length difference < upper_line_length
582+ upper_line_length : float
583+ Upper limit of line length criterion from Eq. 8 in [1].
584+ var_diff : float
585+ Threshold value in Hz for the agreement between normalized
586+ standard deviations of rate of change in irradiance, see Eqs. 9
587+ through 11 in [1].
588+ slope_dev : float
589+ Threshold value for agreement between the largest magnitude of
590+ change in successive values, see Eqs. 12 through 14 in [1].
591+ max_iterations : int
592+ Maximum number of times to apply a different scaling factor to
593+ the clearsky and redetermine clear_samples. Must be 1 or larger.
594+ return_components : bool
595+ Controls if additional output should be returned. See below.
596+
597+ Returns
598+ -------
599+ clear_samples : array or Series
600+ Boolean array or Series of whether or not the given time is
601+ clear. Return type is the same as the input type.
602+
603+ components : OrderedDict, optional
604+ Dict of arrays of whether or not the given time window is clear
605+ for each condition. Only provided if return_components is True.
606+
607+ alpha : scalar, optional
608+ Scaling factor applied to the clearsky_ghi to obtain the
609+ detected clear_samples. Only provided if return_components is
610+ True.
611+
612+ References
613+ ----------
614+ [1] Reno, M.J. and C.W. Hansen, "Identification of periods of clear
615+ sky irradiance in time series of GHI measurements" Renewable Energy,
616+ v90, p. 520-531, 2016.
617+
618+ Notes
619+ -----
620+ Initial implementation in MATLAB by Matthew Reno. Modifications for
621+ computational efficiency by Joshua Patrick and Curtis Martin. Ported
622+ to Python by Will Holmgren, Tony Lorenzo, and Cliff Hansen.
623+
624+ Differences from MATLAB version:
625+
626+ * no support for unequal times
627+ * automatically determines sample_interval
628+ * requires a reference clear sky series instead calculating one
629+ from a user supplied location and UTCoffset
630+ * parameters are controllable via keyword arguments
631+ * option to return individual test components and clearsky scaling
632+ parameter
633+ """
634+
635+ # calculate deltas in units of minutes (matches input window_length units)
636+ deltas = np .diff (times ) / np .timedelta64 (1 , '60s' )
637+
638+ # determine the unique deltas and if we can proceed
639+ unique_deltas = np .unique (deltas )
640+ if len (unique_deltas ) == 1 :
641+ sample_interval = unique_deltas [0 ]
642+ else :
643+ raise NotImplementedError ('algorithm does not yet support unequal ' \
644+ 'times. consider resampling your data.' )
645+
646+ samples_per_window = int (window_length / sample_interval )
647+
648+ # generate matrix of integers for creating windows with indexing
649+ from scipy .linalg import hankel
650+ H = hankel (np .arange (samples_per_window ),
651+ np .arange (samples_per_window - 1 , len (times )))
652+
653+ # calculate measurement statistics
654+ meas_mean = np .mean (measured [H ], axis = 0 )
655+ meas_max = np .max (measured [H ], axis = 0 )
656+ meas_slope = np .diff (measured [H ], n = 1 , axis = 0 )
657+ # matlab std function normalizes by N-1, so set ddof=1 here
658+ meas_slope_nstd = np .std (meas_slope , axis = 0 , ddof = 1 ) / meas_mean
659+ meas_slope_max = np .max (np .abs (meas_slope ), axis = 0 )
660+ meas_line_length = np .sum (np .sqrt (
661+ meas_slope * meas_slope + sample_interval * sample_interval ), axis = 0 )
662+
663+ # calculate clear sky statistics
664+ clear_mean = np .mean (clearsky [H ], axis = 0 )
665+ clear_max = np .max (clearsky [H ], axis = 0 )
666+ clear_slope = np .diff (clearsky [H ], n = 1 , axis = 0 )
667+ clear_slope_max = np .max (np .abs (clear_slope ), axis = 0 )
668+
669+ from scipy .optimize import minimize_scalar
670+
671+ alpha = 1
672+ for iteration in range (max_iterations ):
673+ clear_line_length = np .sum (np .sqrt (
674+ alpha * alpha * clear_slope * clear_slope +
675+ sample_interval * sample_interval ), axis = 0 )
676+
677+ line_diff = meas_line_length - clear_line_length
678+
679+ # evaluate comparison criteria
680+ c1 = np .abs (meas_mean - alpha * clear_mean ) < mean_diff
681+ c2 = np .abs (meas_max - alpha * clear_max ) < max_diff
682+ c3 = (line_diff > lower_line_length ) & (line_diff < upper_line_length )
683+ c4 = meas_slope_nstd < var_diff
684+ c5 = (meas_slope_max - alpha * clear_slope_max ) < slope_dev
685+ c6 = (clear_mean != 0 ) & ~ np .isnan (clear_mean )
686+ clear_windows = c1 & c2 & c3 & c4 & c5 & c6
687+
688+ # create array to return
689+ clear_samples = np .full_like (measured , False , dtype = 'bool' )
690+ # find the samples contained in any window classified as clear
691+ clear_samples [np .unique (H [:, clear_windows ])] = True
692+
693+ # find a new alpha
694+ previous_alpha = alpha
695+ clear_meas = measured [clear_samples ]
696+ clear_clear = clearsky [clear_samples ]
697+ def rmse (alpha ):
698+ return np .sqrt (np .mean ((clear_meas - alpha * clear_clear )** 2 ))
699+ alpha = minimize_scalar (rmse ).x
700+ if round (alpha * 10000 ) == round (previous_alpha * 10000 ):
701+ break
702+ else :
703+ import warnings
704+ warnings .warn ('failed to converge after %s iterations' \
705+ % max_iterations , RuntimeWarning )
706+
707+ # be polite about returning the same type as was input
708+ if isinstance (measured , pd .Series ):
709+ clear_samples = pd .Series (clear_samples , index = times )
710+
711+ if return_components :
712+ components = OrderedDict ()
713+ components ['mean_diff' ] = c1
714+ components ['max_diff' ] = c2
715+ components ['line_length' ] = c3
716+ components ['slope_nstd' ] = c4
717+ components ['slope_max' ] = c5
718+ components ['mean_nan' ] = c6
719+ components ['windows' ] = clear_windows
720+ return clear_samples , components , alpha
721+ else :
722+ return clear_samples
0 commit comments