Source code for hs_process.batch

# -*- coding: utf-8 -*-
import geopandas as gpd
import multiprocessing as mp
import numpy as np
import os
import pandas as pd
import re
import seaborn as sns
import sys
import time
from tqdm import tqdm

from matplotlib import pyplot as plt
import warnings

from hs_process.utilities import defaults
from hs_process.utilities import hsio
from hs_process.segment import segment
from hs_process.spec_mod import spec_mod
from hs_process.spatial_mod import spatial_mod

from concurrent.futures import ThreadPoolExecutor
from concurrent.futures import as_completed


[docs]class batch(object):
    '''
    Class for batch processing hyperspectral image data. Makes use of
    `segment`_, `spatial_mod`_, and `spec_mod`_ to batch process many
    datacubes in a given directory. Supports options to save full
    datacubes, geotiff renders, as well as summary statistics and/or
    reports for the various tools.

    Note:
        It may be a good idea to review and understand the `defaults`_,
        `hsio`_, `hstools`_, `segment`_, `spatial_mod`_, and `spec_mod`_
        classes prior to using the ``batch`` module.

    .. _defaults: hs_process.defaults.html
    .. _hsio: hs_process.hsio.html
    .. _hstools: hs_process.hstools.html
    .. _segment: hs_process.segment.html
    .. _spatial_mod: hs_process.spatial_mod.html
    .. _spec_mod: hs_process.spec_mod.html
    '''
    def __init__(self, base_dir=None, search_ext='.bip', dir_level=0,
                 lock=None, progress_bar=False):
        '''
        Parameters:
            base_dir (``str``, optional): directory path to search for files to
                spectrally clip; if ``fname_list`` is not ``None``, ``base_dir`` will
                be ignored (default: ``None``).
            search_ext (``str``): file format/extension to search for in all
                directories and subdirectories to determine which files to
                process; if ``fname_list`` is not ``None``, ``search_ext`` will
                be ignored (default: 'bip').
            dir_level (``int``): The number of directory levels to search; if
                ``None``, searches all directory levels (default: 0).
            lock (``multiprocessing.Lock``): Can be passed to ensure lock is in
                place when writing to a file during multiprocessing.
        '''
        self.base_dir = base_dir
        self.search_ext = search_ext
        self.dir_level = dir_level
        self.lock = lock
        self.progress_bar = progress_bar

        self.fname_list = None
        if base_dir is not None:
            self.fname_list = self._recurs_dir(base_dir, search_ext, dir_level)

        self.io = hsio()
        self.my_spectral_mod = None
        self.my_spatial_mod = None
        self.my_segment = None

    def _try_spat_crop_col_key(self, key, df_row):
        '''
        Gets value of ``key`` (column name) from ``df_row``; returns
        ``None`` if there is a KeyError

        This is tricky for crop_X and buf_X columns, because we must decipher
        whether to get these values from the default pool or not. If we get a
        KeyError, our first instinct is to gather the default, but we must
        check the "inverse" first (the "inverse" of crop_e_pix is crop_e_m) to
        avoid overwriting a value passed in df_row unintentionally. Therefore,
        this function handles keys differently if "crop" or "buf" are part of
        ``key`` than if they are not part of ``key``

        Adds ``key`` to batch.io.defaults.spat_crop_cols if it does not yet
        exist, but then of course the ``value`` that is returned will be
        ``None``
        '''
        if key not in self.io.defaults.spat_crop_cols.keys():
            print('Adding key "{0}" to defaults.spat_crop_cols dictionary'
                  ''.format(key))
            self.io.defaults.spat_crop_cols[key] = key

        try:
            value = df_row[self.io.defaults.spat_crop_cols[key]]
        except KeyError:  # try to retrieve a default value
            # decide whehter to get default or not.. how?
            # check the inverse to see if it is accesible
            # try:
            #     value = self.io.defaults.crop_defaults[key]
            # except KeyError:
            #     value = None
            if 'crop' in key or 'buf' in key:
                key_base = key[:key.find('_', key.rfind('_'))]
                key_unit = key[key.find('_', key.rfind('_')):]
                if key_unit == '_m':
                    key_unit_inv = '_pix'
                elif key_unit == '_pix':
                    key_unit_inv = '_m'
                try:
                    value_inv = df_row[self.io.defaults.spat_crop_cols[key_base+key_unit_inv]]  # exists; set to NaN and carry on
                    value = None
                except KeyError:  # neither exist, gather default
                    try:
                        value = self.io.defaults.crop_defaults[key]
                    except KeyError:
                        value = None
            else:  # proceed as normal
                try:
                    value = self.io.defaults.crop_defaults[key]
                except KeyError:
                    value = None

        # if key in ['crop_e_m', 'crop_n_m', 'crop_e_pix', 'crop_n_pix']:
        #     print('Key: {0}  Value: {1}'.format(key, value))
        return value

    def _check_processed(self, fname_list, base_dir_out, folder_name,
                         name_append, append_extra=None, ext=None):
        '''
        Checks if any files in fname_list have already (presumably) undergone
        processing. This is determined by checking if a file exists with a
        particular name based on the filename in fname_list and naming
        parameters (i.e,. ``folder_name`` and ``name_append``).

        Parameters:
            ext (``str``): e.g., '.spec'
        '''
        if append_extra is None:
            append_extra = ''
        fname_list_final = fname_list.copy()
        for fname in fname_list:
            if base_dir_out is None:
                base_dir = os.path.split(fname)[0]
                dir_out, name_append = self._save_file_setup(
                        base_dir, folder_name, name_append)
            else:
                dir_out, name_append = self._save_file_setup(
                        base_dir_out, folder_name, name_append)
            name_print = self._get_name_print(fname)

            if ext is None:
                name_label = (name_print + name_append + append_extra + '.' +
                              self.io.defaults.envi_write.interleave)
            else:
                name_label = (name_print + name_append + append_extra + ext)
            if os.path.isfile(os.path.join(dir_out, name_label)):
                fname_list_final.remove(fname)
        msg1 = ('There are no files to process. Please check if files have '
                'already undergone processing. If existing files should be '
                'overwritten, be sure to set the ``out_force`` parameter.\n')
        msg2 = ('Processing {0} files. If existing files should be '
                'overwritten, be sure to set the ``out_force`` parameter.\n'
                ''.format(len(fname_list_final)))
        if not len(fname_list_final) > 0:
            warnings.warn(msg1, UserWarning, stacklevel=0)
        # else:
        #     print(msg2)
        time.sleep(0.2)  # when using progress bar, this keeps from splitting lines
        return fname_list_final

    def _crop_read_sheet(self, row):
        '''
        Reads the necessary information from the spreadsheet and saves it
        to a dictionary

        If this function causes an error, try checking
        ``batch.io.defaults.spat_crop_col`` - these should be adjusted
        according to the default column names of the input (i.e.,
        ``fname_sheet``).
        '''
        crop_specs = {
                'directory': self._try_spat_crop_col_key('directory', row),
                'fname': self._try_spat_crop_col_key('fname', row),
                'name_short': self._try_spat_crop_col_key('name_short', row),
                'name_long': self._try_spat_crop_col_key('name_long', row),
                'ext': self._try_spat_crop_col_key('ext', row),
                'plot_id_ref': self._try_spat_crop_col_key('plot_id_ref', row),
                'pix_e_ul': self._try_spat_crop_col_key('pix_e_ul', row),
                'pix_n_ul': self._try_spat_crop_col_key('pix_n_ul', row),
                'alley_size_e_m': self._try_spat_crop_col_key('alley_size_e_m', row),
                'alley_size_n_m': self._try_spat_crop_col_key('alley_size_n_m', row),
                'alley_size_e_pix': self._try_spat_crop_col_key('alley_size_e_pix', row),
                'alley_size_n_pix': self._try_spat_crop_col_key('alley_size_n_pix', row),
                'buf_e_m': self._try_spat_crop_col_key('buf_e_m', row),
                'buf_n_m': self._try_spat_crop_col_key('buf_n_m', row),
                'buf_e_pix': self._try_spat_crop_col_key('buf_e_pix', row),
                'buf_n_pix': self._try_spat_crop_col_key('buf_n_pix', row),
                'crop_e_m': self._try_spat_crop_col_key('crop_e_m', row),
                'crop_n_m': self._try_spat_crop_col_key('crop_n_m', row),
                'crop_e_pix': self._try_spat_crop_col_key('crop_e_pix', row),
                'crop_n_pix': self._try_spat_crop_col_key('crop_n_pix', row),
                'gdf_shft_e_pix': self._try_spat_crop_col_key('gdf_shft_e_pix', row),
                'gdf_shft_n_pix': self._try_spat_crop_col_key('gdf_shft_n_pix', row),
                'gdf_shft_e_m': self._try_spat_crop_col_key('gdf_shft_e_m', row),
                'gdf_shft_n_m': self._try_spat_crop_col_key('gdf_shft_n_m', row),
                'n_plots_x': self._try_spat_crop_col_key('n_plots_x', row),
                'n_plots_y': self._try_spat_crop_col_key('n_plots_y', row),
                'n_plots': self._try_spat_crop_col_key('n_plots', row)}
        if crop_specs['fname'] is None:
            try:
                crop_specs['fname'] = (crop_specs['name_short'] +
                                       crop_specs['name_long'] +
                                       crop_specs['ext'])
            except TypeError:
                crop_specs['fname'] = None
        if crop_specs['fname'] is not None:
            base_name = os.path.basename(crop_specs['fname'])
            if crop_specs['name_short'] is None:
                crop_specs['name_short'] = base_name[
                        :base_name.find('-', base_name.rfind('_'))]
            if crop_specs['name_long'] is None:
                crop_specs['name_long'] = base_name[
                        base_name.find('-', base_name.rfind('_')):]
            if crop_specs['ext'] is None:
                crop_specs['ext'] = os.path.splitext(crop_specs['fname'])[1]

        for col_name in row.index:
            if col_name not in self.io.defaults.spat_crop_cols.keys():
                crop_specs[col_name] = row[col_name]
        if not pd.notnull(crop_specs['name_long']):
            crop_specs['name_long'] = None
        if not pd.notnull(crop_specs['plot_id_ref']):
            crop_specs['plot_id_ref'] = None
        if not pd.notnull(crop_specs['name_short']):
            crop_specs['name_short'] = None

        self.crop_specs = crop_specs
        return crop_specs

    def _pix_to_mapunit(self, crop_specs, spyfile=None):
        '''
        Looks over specifications of ``crop_specs``, and converts betweeen pixel
        units and map units if one is populated and the other is ``None``
        '''
        cs = crop_specs.copy()

        if spyfile is None:
            spyfile = self.io.spyfile
        spy_ps_e = float(spyfile.metadata['map info'][5])
        spy_ps_n = float(spyfile.metadata['map info'][6])
        # Crop size
#        if cs['crop_e_pix'] is None and cs['crop_e_m'] is not None:
        if pd.isnull(cs['crop_e_pix']) and pd.notnull(cs['crop_e_m']):
            cs['crop_e_pix'] = int(cs['crop_e_m'] / spy_ps_e)
        elif pd.notnull(cs['crop_e_pix']) and pd.isnull(cs['crop_e_m']):
            cs['crop_e_m'] = cs['crop_e_pix'] * spy_ps_e
        if pd.isnull(cs['crop_n_pix']) and pd.notnull(cs['crop_n_m']):
            cs['crop_n_pix'] = int(cs['crop_n_m'] / spy_ps_n)
        elif pd.notnull(cs['crop_n_pix']) and pd.isnull(cs['crop_n_m']):
            cs['crop_n_m'] = cs['crop_n_pix'] * spy_ps_n
        # Buffer
        if pd.isnull(cs['buf_e_pix']) and pd.notnull(cs['buf_e_m']):
            cs['buf_e_pix'] = int(cs['buf_e_m'] / spy_ps_e)
        elif pd.notnull(cs['buf_e_pix']) and pd.isnull(cs['buf_e_m']):
            cs['buf_e_m'] = cs['buf_e_pix'] * spy_ps_e
        if pd.isnull(cs['buf_n_pix']) and pd.notnull(cs['buf_n_m']):
            cs['buf_n_pix'] = int(cs['buf_n_m'] / spy_ps_e)
        elif pd.notnull(cs['buf_n_pix']) and pd.isnull(cs['buf_n_m']):
            cs['buf_n_m'] = cs['buf_n_pix'] * spy_ps_e
        # Shift
        if pd.isnull(cs['gdf_shft_e_pix']) and pd.notnull(cs['gdf_shft_e_m']):
            cs['gdf_shft_e_pix'] = int(cs['gdf_shft_e_m'] / spy_ps_e)
        elif pd.notnull(cs['gdf_shft_e_pix']) and pd.isnull(cs['gdf_shft_e_m']):
            cs['gdf_shft_e_m'] = cs['gdf_shft_e_pix'] * spy_ps_e
        if pd.isnull(cs['gdf_shft_n_pix']) and pd.notnull(cs['gdf_shft_n_m']):
            cs['gdf_shft_n_pix'] = int(cs['gdf_shft_n_m'] / spy_ps_e)
        elif pd.notnull(cs['gdf_shft_n_pix']) and pd.isnull(cs['gdf_shft_n_m']):
            cs['gdf_shft_n_m'] = cs['gdf_shft_n_pix'] * spy_ps_e
        # Alley size
        if (pd.isnull(cs['alley_size_e_pix']) and
                pd.notnull(cs['alley_size_e_m'])):
            cs['alley_size_e_pix'] = int(cs['alley_size_e_m'] / spy_ps_e)
        elif (pd.notnull(cs['alley_size_e_pix']) and
                  pd.isnull(cs['alley_size_e_m'])):
            cs['alley_size_e_m'] = cs['alley_size_e_pix'] * spy_ps_e
        if (pd.isnull(cs['alley_size_n_pix']) and
                pd.notnull(cs['alley_size_n_m'])):
            cs['alley_size_n_pix'] = int(cs['alley_size_n_m'] / spy_ps_n)
        elif (pd.notnull(cs['alley_size_n_pix']) and
                  pd.isnull(cs['alley_size_n_m'])):
            cs['alley_size_n_m'] = cs['alley_size_n_pix'] * spy_ps_n
        self.crop_specs = cs
        return cs

    def _composite_band_setup(self, base_dir_out, fname, folder_name, name_append):
         '''
         '''
         if base_dir_out is None:
             base_dir = os.path.dirname(fname)
             dir_out, name_append = self._save_file_setup(
                     base_dir, folder_name, name_append)
         else:
             dir_out, name_append = self._save_file_setup(
                     base_dir_out, folder_name, name_append)
         name_print = self._get_name_print()
         return dir_out, name_print, name_append

    def _band_math_setup(self, base_dir_out, folder_name, fname, name_append,
                         method):
        '''
        '''
        msg = ('``method`` must be one of either "ndi", "ratio", "derivative", '
               'or "mcari2".\n')
        assert method in ['ndi', 'ratio', 'derivative', 'mcari2'], msg

        if base_dir_out is None:
            base_dir = os.path.dirname(fname)
            dir_out, name_append = self._save_file_setup(
                    base_dir, folder_name, name_append)
        else:
            dir_out, name_append = self._save_file_setup(
                    base_dir_out, folder_name, name_append)
        name_print = self._get_name_print()
        return dir_out, name_print, name_append

        # if method == 'ndi':
        #     print('Calculating normalized difference index for: {0}'
        #           ''.format(name_print))
        # elif method == 'ratio':
        #     print('Calculating simple ratio index for: {0}'
        #           ''.format(name_print))
        # elif method == 'mcari2':
        #     print('Calculating MCARI2 index for: {0}'
        #           ''.format(name_print))


    def _mask_stats_setup(self, mask_thresh, mask_percentile, mask_side):
        '''
        Parse thesholds and percentiles to dynamically set column names for
        masked df_stats
        '''
        if mask_thresh is not None:
            if not isinstance(mask_thresh, list):
                mask_thresh = [mask_thresh]
            mask_thresh_print = '-'.join([str(x) for x in mask_thresh])
        if mask_percentile is not None:
            if not isinstance(mask_percentile, list):
                mask_percentile = [mask_percentile]
            mask_pctl_print = '-'.join([str(x) for x in mask_percentile])
        if mask_side is not None:
            if not isinstance(mask_side, list):
                mask_side = [mask_side]
            mask_side_print = '-'.join([str(x) for x in mask_side])
        if mask_thresh is not None and mask_percentile is not None:
            type_mask = ('mask-{0}-thresh-{1}-pctl-{2}'.format(
                    mask_side_print, mask_thresh_print, mask_pctl_print))
        elif mask_thresh is not None and mask_percentile is None:
            type_mask = ('mask-{0}-thresh-{1}'.format(
                    mask_side_print, mask_thresh_print))
        elif mask_thresh is None and mask_percentile is not None:
            type_mask = ('mask-{0}-pctl-{1}'.format(
                    mask_side_print, mask_pctl_print))
        columns = ['fname', 'plot_id', type_mask + '-count',
                   type_mask + '-mean', type_mask + '-stdev',
                   type_mask + '-median']
        df_stats = pd.DataFrame(columns=columns)
        return df_stats, type_mask

    def _mask_single_stats(self, fname, array_bm, metadata_bm,
                           mask_thresh, mask_percentile, mask_side, df_stats):
        '''
        Creates the bandmath mask and summarizes the band math values after
        masking unwanted pixels. Returns the single masked bandmath array and
        the stats dataframe with the new image data appended as a row
        '''
        array_mask, metadata_bm = self.io.tools.mask_array(
                array_bm, metadata_bm, thresh=mask_thresh,
                percentile=mask_percentile, side=mask_side)
        # array_mask, metadata_bm = hsbatch.io.tools.mask_array(
        #         array_bandmath1, metadata_bandmath1, thresh=mask_thresh,
        #         percentile=mask_percentile, side=mask_side)

        # stat_mask_count = np.count_nonzero(~np.isnan(array_mask))
        # all nan values should be masked from mask_array() function
        stat_mask_count = array_mask.count()
        stat_mask_mean = array_mask.mean()
        stat_mask_std = array_mask.std()
        stat_mask_med = np.ma.median(array_mask)
        # stat_mask_mean = np.nanmean(array_mask)
        # stat_mask_std = np.nanstd(array_mask)
        # stat_mask_med = np.nanmedian(array_mask)

        data = [fname, self.io.name_plot, stat_mask_count, stat_mask_mean,
                stat_mask_std, stat_mask_med]
        df_stats_temp = pd.DataFrame(data=[data], columns=df_stats.columns)
        df_stats = df_stats.append(df_stats_temp, ignore_index=True)
        return array_mask, df_stats


    def _mask_two_step(self, mask_dir, mask_side, mask_thresh,
                       mask_percentile, fname, df_stats1, df_stats2,
                       name_label):
        '''
        Performs a two-step masking process. The masked masked
        bandmath arrays and stats for each step are returned.
        '''
        msg1 = ('Either ``mask_thresh`` or ``mask_percentile`` is a '
                'list, but ``mask_dir`` is not a list. If trying to '
                'perform a "two-step" masking process, please be sure '
                'to pass a list with length of two for both '
                '``mask_dir`` and ``mask_side``, as well as either '
                'for ``mask_thresh`` or ``mask_percentile``.\n'
                '``mask_dir``: {0}\n``mask_side``: {1}'
                ''.format(mask_dir, mask_side))
        msg2 = ('Either ``mask_thresh`` or ``mask_percentile`` is a '
                'list, but ``mask_side`` is not a list. If trying to '
                'perform a "two-step" masking process, please be sure '
                'to pass a list with length of two for both '
                '``mask_dir`` and ``mask_side``, as well as either '
                'for ``mask_thresh`` or ``mask_percentile``.\n'
                '``mask_dir``: {0}\n``mask_side``: {1}'
                ''.format(mask_dir, mask_side))
        assert isinstance(mask_dir, list), msg1
        assert isinstance(mask_side, list), msg2

        array_bandmath1, metadata_bandmath1 = self._get_array_similar(
                mask_dir[0])
        array_bandmath2, metadata_bandmath2 = self._get_array_similar(
                mask_dir[1])
        if isinstance(mask_thresh, list):
            array_mask1, df_stats1 = self._mask_single_stats(
                fname, array_bandmath1, metadata_bandmath1,
                mask_thresh[0], None, mask_side[0], df_stats1)
            array_mask2, df_stats2 = self._mask_single_stats(
                fname, array_bandmath2, metadata_bandmath2,
                mask_thresh[1], None, mask_side[1], df_stats2)
        elif isinstance(mask_percentile, list):
            array_mask1, df_stats1 = self._mask_single_stats(
                fname, array_bandmath1, metadata_bandmath1, None,
                mask_percentile[0], mask_side[0], df_stats1)
            array_mask2, df_stats2 = self._mask_single_stats(
                fname, array_bandmath2, metadata_bandmath2, None,
                mask_percentile[1], mask_side[1], df_stats2)
        return array_mask1, array_mask2, df_stats1, df_stats2

    def _execute_mask(self, fname_list, mask_dir, base_dir_out, folder_name,
                      name_append, write_datacube, write_spec, write_geotiff,
                      mask_thresh, mask_percentile, mask_side):
        '''
        Actually creates the mask to keep the main function a bit cleaner
        '''
        if mask_side == 'outside':  # thresh/pctl will be a list, so take care of this first
            df_stats1, type_mask1 = self._mask_stats_setup(
                mask_thresh, mask_percentile, mask_side)
            df_stats2 = None
            type_mask2 = None
        # if mask_side is not "outside" and thresh is list, then it's a 2-step
        elif isinstance(mask_thresh, list):
            if not isinstance(mask_side, list):
                maskside = [mask_side, mask_side]  # ensure that mask_side is two parts as well
            df_stats1, type_mask1 = self._mask_stats_setup(mask_thresh[0], None, mask_side[0])
            df_stats2, type_mask2 = self._mask_stats_setup(mask_thresh[1], None, mask_side[1])
        elif isinstance(mask_percentile, list):
            if not isinstance(mask_side, list):
                maskside = [mask_side, mask_side]  # ensure that mask_side is two parts as well
            df_stats1, type_mask1 = self._mask_stats_setup(None, mask_percentile[0], mask_side[0])
            df_stats2, type_mask2 = self._mask_stats_setup(None, mask_percentile[1], mask_side[1])
        else:
            df_stats1, type_mask1 = self._mask_stats_setup(mask_thresh, mask_percentile, mask_side)
            df_stats2 = None
            type_mask2 = None

        fname_list_p = tqdm(fname_list) if self.progress_bar is True else fname_list
        for idx, fname in enumerate(fname_list_p):
            if self.progress_bar is True:
                fname_list_p.set_description('Processing file {0}/{1}'.format(idx, len(fname_list)))
            self.io.read_cube(fname)
            metadata = self.io.spyfile.metadata.copy()
            metadata_geotiff = self.io.spyfile.metadata.copy()
            base_dir = os.path.dirname(fname)
            if base_dir_out is None:
                dir_out, name_append = self._save_file_setup(
                        base_dir, folder_name, name_append)
            else:
                dir_out, name_append = self._save_file_setup(
                        base_dir_out, folder_name, name_append)
            name_print = self._get_name_print()
            name_label = (name_print + name_append + '.' +
                          self.io.defaults.envi_write.interleave)
            if self._file_exists_check(
                    dir_out, name_label, write_datacube=write_datacube,
                    write_spec=write_spec, write_geotiff=write_geotiff) is True:
                continue
            # array = self.io.spyfile.load()
            array = self.io.spyfile.open_memmap()

            if mask_dir is None:
                mask_dir = os.path.join(self.io.base_dir, 'band_math')
            if df_stats2 is not None:
                array_mask1, array_mask2, df_stats1, df_stats2 =\
                    self._mask_two_step(mask_dir, mask_side, mask_thresh,
                                        mask_percentile, fname, df_stats1,
                                        df_stats2, name_label)
                array_mask = np.logical_or(array_mask1.mask,
                                           array_mask2.mask)
            else:  # things are much simpler
                array_bandmath1, metadata_bandmath1 = self._get_array_similar(
                        mask_dir)
                array_mask, df_stats1 = self._mask_single_stats(
                    fname, array_bandmath1, metadata_bandmath1, mask_thresh,
                    mask_percentile, mask_side, df_stats1)
                array_mask = array_mask.mask

            spec_mean, spec_std, datacube_masked = self.io.tools.mean_datacube(
                    array, array_mask)
            self.spec_mean = spec_mean
            self.spec_std = spec_std
            hist_str = (" -> hs_process.batch.segment_create_mask[<"
                        "label: 'mask_thresh?' value:{0}; "
                        "label: 'mask_percentile?' value:{1}; "
                        "label: 'mask_side?' value:{2}>]"
                        "".format(mask_thresh, mask_percentile, mask_side))
            metadata['history'] += hist_str
            metadata_geotiff['history'] += hist_str

            if write_datacube is True:
                self._write_datacube(dir_out, name_label, datacube_masked,
                                     metadata)
            if write_spec is True:
                name_label_spec = (os.path.splitext(name_label)[0] +
                                   '-mean.spec')
                self._write_spec(dir_out, name_label_spec, spec_mean, spec_std,
                                 metadata)

            self.array_mask = array_mask
            if write_geotiff is True:
                self._write_geotiff(array_mask, fname, dir_out, name_label,
                                    metadata_geotiff, self.io.tools)

        if len(df_stats1) > 0:
            fname_stats1 = os.path.join(dir_out, type_mask1 + '.csv')
            df_stats1.to_csv(fname_stats1, index=False)
        if df_stats2 is not None:
            if len(df_stats2) > 0:
                # fname_csv2 = 'mask-stats2.csv'
                fname_stats2 = os.path.join(dir_out, type_mask2 + '.csv')
                df_stats2.to_csv(fname_stats2, index=False)
#            # should we make an option to save a mean spectra as well?
#            # Yes - we aren't required to save intermediate results and do
#            # another batch process..? we get everything done in one shot -
#            # after all, why do we want to do band math if we aren't also
#            # calculating the average of the area (unless cropping hasn't
#            # been perfomed yet)?
#            # No - Keep it simpler and keep batch functions more specific in
#            # their capabilities (e.g., batch.band_math, batch.mask_array,
#            # batch.veg_spectra)
#
    def _write_stats(self, dir_out, df_stats, fname_csv='stats.csv'):
        '''
        Writes df_stats to <dir_out>, ensuring lock is in place if it exists to
        work as expected with parallel processing.
        '''
        fname_stats = os.path.join(dir_out, fname_csv)

        if self.lock is not None:
            with self.lock:
                if os.path.isfile(fname_stats):
                    df_stats_in = pd.read_csv(fname_stats)
                    df_stats = df_stats_in.append(df_stats)
                df_stats.to_csv(fname_stats, index=False)
        else:
            if os.path.isfile(fname_stats):
                df_stats_in = pd.read_csv(fname_stats)
                df_stats = df_stats_in.append(df_stats)
            df_stats.to_csv(fname_stats, index=False)

    def _execute_composite_band(self, fname_list, base_dir_out, folder_name,
                                name_append, write_geotiff, wl1, b1,
                                list_range, plot_out):
        '''
        Actually executes the composit band to keep the main function a bit
        cleaner
        '''
        type_bm = '-comp-{0}'.format(int(np.mean(wl1)))
        columns = ['fname', 'plot_id', 'count', 'mean', 'std_dev', 'median',
                   'pctl_10th', 'pctl_25th', 'pctl_50th', 'pctl_75th',
                   'pctl_90th', 'pctl_95th']
        df_stats = pd.DataFrame(columns=columns)

        fname_list_p = tqdm(fname_list) if self.progress_bar is True else fname_list
        for idx, fname in enumerate(fname_list_p):
            if self.progress_bar is True:
                fname_list_p.set_description('Processing file {0}/{1}'.format(idx, len(fname_list)))
            self.io.read_cube(fname)
            dir_out, name_print, name_append = self._composite_band_setup(
                    base_dir_out, fname, folder_name, name_append)
            self.my_segment = segment(self.io.spyfile)
            name_label = (name_print + name_append + type_bm + '.{0}'
                          ''.format(self.io.defaults.envi_write.interleave))
            if self._file_exists_check(
                    dir_out, name_label, write_datacube=True,
                    write_geotiff=write_geotiff, write_plot=plot_out) is True:
                continue
            array_b1, metadata = self.my_segment.composite_band(
                wl1=wl1, b1=b1, list_range=list_range, print_out=False)

            stat_count = np.count_nonzero(~np.isnan(array_b1))
            stat_mean = np.nanmean(array_b1)
            stat_std = np.nanstd(array_b1)
            stat_med = np.nanmedian(array_b1)
            stat_pctls = np.nanpercentile(array_b1, [10, 25, 50, 75, 90, 95])

            data = [fname, self.io.name_plot, stat_count, stat_mean, stat_std,
                    stat_med, stat_pctls[0], stat_pctls[1], stat_pctls[2],
                    stat_pctls[3], stat_pctls[4], stat_pctls[5]]
            df_stats_temp = pd.DataFrame(data=[data], columns=columns)
            df_stats = df_stats.append(df_stats_temp, ignore_index=True)

            if plot_out is True:
                fname_fig = os.path.join(dir_out,
                                         os.path.splitext(name_label)[0] +
                                         '.png')
                self.io.tools.plot_histogram(
                        array_b1, fname_fig=fname_fig, title=name_print,
                        xlabel=array_b1.upper(), percentile=90, bins=50,
                        fontsize=14, color='#444444')
            metadata['label'] = name_label

            self._write_datacube(dir_out, name_label, array_b1, metadata)
            if write_geotiff is True:
                self._write_geotiff(array_b1, fname, dir_out, name_label,
                                    metadata, self.my_segment.tools)
        if len(df_stats) > 0:
            self._write_stats(dir_out, df_stats, fname_csv=name_append[1:] + '-stats.csv')

    def _execute_band_math(self, fname_list, base_dir_out, folder_name,
                           name_append, write_geotiff, method, wl1, wl2, wl3, b1, b2,
                           b3, list_range, plot_out):
        '''
        Actually executes the band math to keep the main function a bit
        cleaner
        '''
        if method == 'ndi' or method == 'ratio':
            type_bm = ('{0}-{1}-{2}'.format(method, int(np.mean(wl1)),
                                            int(np.mean(wl2))))
        elif method == 'derivative':
            type_bm = ('{0}-{1}-{2}-{3}'.format(method, int(np.mean(wl1)),
                                                int(np.mean(wl2)),
                                                int(np.mean(wl2))))
        elif method == 'mcari2':
            type_bm = ('{0}-{1}-{2}-{3}'.format(method, int(np.mean(wl1)),
                                                int(np.mean(wl2)),
                                                int(np.mean(wl2))))
        columns = ['fname', 'plot_id', 'count', 'mean', 'std_dev', 'median',
                   'pctl_10th', 'pctl_25th', 'pctl_50th', 'pctl_75th',
                   'pctl_90th', 'pctl_95th']
        df_stats = pd.DataFrame(columns=columns)

        fname_list_p = tqdm(fname_list) if self.progress_bar is True else fname_list
        for idx, fname in enumerate(fname_list_p):
            if self.progress_bar is True:
                fname_list_p.set_description('Processing file {0}/{1}'.format(idx, len(fname_list)))
            self.io.read_cube(fname)
            dir_out, name_print, name_append = self._band_math_setup(
                    base_dir_out, folder_name, fname, name_append, method)
            self.my_segment = segment(self.io.spyfile)

            if method == 'ndi':
                name_label = (name_print + name_append + '-{0}-{1}-{2}.{3}'
                              ''.format(method, int(np.mean(wl1)),
                                        int(np.mean(wl2)),
                                        self.io.defaults.envi_write.interleave))
                if self._file_exists_check(
                        dir_out, name_label, write_datacube=True,
                        write_geotiff=write_geotiff,
                        write_plot=plot_out) is True:
                    continue
                array_bm, metadata = self.my_segment.band_math_ndi(
                        wl1=wl1, wl2=wl2, b1=b1, b2=b2, list_range=list_range,
                        print_out=False)
            elif method == 'ratio':
                name_label = (name_print + name_append + '-{0}-{1}-{2}.{3}'
                              ''.format(method, int(np.mean(wl1)),
                                        int(np.mean(wl2)),
                                        self.io.defaults.envi_write.interleave))
                if self._file_exists_check(
                        dir_out, name_label, write_datacube=True,
                        write_geotiff=write_geotiff,
                        write_plot=plot_out) is True:
                    continue
                array_bm, metadata = self.my_segment.band_math_ratio(
                        wl1=wl1, wl2=wl2, b1=b1, b2=b2, list_range=list_range,
                        print_out=False)
            elif method == 'derivative':
                name_label = (name_print + name_append + '-{0}-{1}-{2}-{3}.{4}'
                              ''.format(method, int(np.mean(wl1)),
                                        int(np.mean(wl2)),
                                        int(np.mean(wl3)),
                                        self.io.defaults.envi_write.interleave))
                if self._file_exists_check(
                        dir_out, name_label, write_datacube=True,
                        write_geotiff=write_geotiff,
                        write_plot=plot_out) is True:
                    continue
                array_bm, metadata = self.my_segment.band_math_derivative(
                        wl1=wl1, wl2=wl2, wl3=wl3, b1=b1, b2=b2, b3=b3,
                        list_range=list_range, print_out=False)
            elif method == 'mcari2':
                name_label = (name_print + name_append + '-{0}-{1}-{2}-{3}.{4}'
                              ''.format(method, int(np.mean(wl1)),
                                        int(np.mean(wl2)),
                                        int(np.mean(wl3)),
                                        self.io.defaults.envi_write.interleave))
                if self._file_exists_check(
                        dir_out, name_label, write_datacube=True,
                        write_geotiff=write_geotiff,
                        write_plot=plot_out) is True:
                    continue
                array_bm, metadata = self.my_segment.band_math_mcari2(
                        wl1=wl1, wl2=wl2, wl3=wl3, b1=b1, b2=b2, b3=b3,
                        list_range=list_range, print_out=False)

            stat_count = np.count_nonzero(~np.isnan(array_bm))
            stat_mean = np.nanmean(array_bm)
            stat_std = np.nanstd(array_bm)
            stat_med = np.nanmedian(array_bm)
            stat_pctls = np.nanpercentile(array_bm, [10, 25, 50, 75, 90, 95])

            data = [fname, self.io.name_plot, stat_count, stat_mean, stat_std,
                    stat_med, stat_pctls[0], stat_pctls[1], stat_pctls[2],
                    stat_pctls[3], stat_pctls[4], stat_pctls[5]]
            df_stats_temp = pd.DataFrame(data=[data], columns=columns)
            df_stats = df_stats.append(df_stats_temp, ignore_index=True)

            if plot_out is True:
                fname_fig = os.path.join(dir_out,
                                         os.path.splitext(name_label)[0] +
                                         '.png')
                self.io.tools.plot_histogram(
                        array_bm, fname_fig=fname_fig, title=name_print,
                        xlabel=type_bm.upper(), percentile=90, bins=50,
                        fontsize=14, color='#444444')

            metadata['label'] = name_label

            self._write_datacube(dir_out, name_label, array_bm, metadata)
            if write_geotiff is True:
                self._write_geotiff(array_bm, fname, dir_out, name_label,
                                    metadata, self.my_segment.tools)

        if len(df_stats) > 0:
            self._write_stats(dir_out, df_stats, fname_csv=name_append[1:] + '-stats.csv')


    def _get_ndvi_simple(self, df_class_spec, n_classes, plot_out=True):
        '''
        Find kmeans class with lowest NDVI, which represents the soil class
        '''
        nir_b = self.io.tools.get_band(760)
        re_b = self.io.tools.get_band(715)
        red_b = self.io.tools.get_band(681)
        green_b = self.io.tools.get_band(555)

        nir = df_class_spec.iloc[nir_b]
        re = df_class_spec.iloc[re_b]
        red = df_class_spec.iloc[red_b]
        green = df_class_spec.iloc[green_b]

        df_ndvi = (nir-red)/(nir+red)
        class_soil = df_ndvi[df_ndvi == df_ndvi.min()].index[0]
        class_veg = df_ndvi[df_ndvi == df_ndvi.max()].index[0]
        if plot_out is True:
            df_class_spec['wavelength'] = self.io.tools.meta_bands.values()
            fig, ax = plt.subplots()
            sns.lineplot(data=df_class_spec, ax=ax)
            legend = ax.legend()
            legend.set_title('K-means classes')
            legend.texts[class_soil].set_text('Soil')
            legend.texts[class_veg].set_text('Vegetation')
        return class_soil, class_veg

    def _crop_check_input(self, fname_sheet, fname_list, method):
        '''
        Checks that either `fname_sheet` or `fname_list` were passed (and not
        both)
        '''
        if fname_sheet is not None:
            if isinstance(fname_sheet, pd.DataFrame) and pd.isnull(fname_list):
                df_plots = fname_sheet
            elif os.path.splitext(fname_sheet)[-1] == '.csv' and pd.isnull(fname_list):
                df_plots = pd.read_csv(fname_sheet)
            elif fname_list is not None:
                msg2 = ('Both ``fname_sheet`` and ``fname_list`` were passed. '
                        '``fname_list`` (perhaps from ``base_dir``) will be '
                        'ignored.\n')
                print(msg2)
                if isinstance(fname_sheet, pd.DataFrame):
                    df_plots = fname_sheet
                elif os.path.splitext(fname_sheet)[-1] == '.csv':
                    df_plots = pd.read_csv(fname_sheet)
            return df_plots
        elif pd.isnull(fname_sheet) and all(pd.isnull(fname_list)):
            msg1 = ('Neither ``fname_sheet`` nor ``fname_list`` were passed. '
                    'Please pass one or the other (not both) and run '
                    '``batch.spatial_crop`` again.\n')
            raise TypeError(msg1)
        else:  # fname_list was passed and df_plots will be figured out later
            msg3 = ('``method`` is "single", but ``fname_list`` was passed '
                    'instead of ``fname_sheet``.\n\nIf performing '
                    '``crop_single``, please pass ``fname_sheet``.\n\nIf '
                    'performing ``crop_many_gdf``, please pass ``fname_list`` '
                    '(perhaps via ``base_dir``).\n')
            assert method in ['many_grid', 'many_gdf'], msg3
            return

    def _file_exists_check(self, dir_out, name_label,
                           write_datacube=False, write_spec=False,
                           write_geotiff=False, write_plot=False):
        '''
        Checks if all files to be created exist already; if so, returns True;
        if not, returns False.
        '''
        if self.io.defaults.envi_write.force is True:
            return False

        write_dict = {'write_datacube': write_datacube,
                      'write_spec': write_spec,
                      'write_geotiff': write_geotiff,
                      'write_plot': write_plot}
        ext_dict = {'write_datacube': '.bip',
                    'write_spec': '-mean.spec',
                    'write_geotiff': '.tif',
                    'write_plot': '.png'}

        msg = ('Skipping file - it appears as if this image has already '
               'been processed. Overwrite files by passing out_force=True\n'
               'Filename (short): {0}'.format(name_label))

        for key in write_dict.keys():
            if write_dict[key] is True:
                fname = os.path.splitext(
                    os.path.join(dir_out, name_label))[0] + ext_dict[key]
                if not os.path.isfile(fname):  # if we need the file and it doesn't exist, we're done checking
                    return False
        # if we get here without already exiting, all files should exist
        print(msg)
        return True

    def _crop_loop(self, df_plots, gdf, base_dir_out, folder_name,
                   name_append, write_geotiff):
        '''
        ``df_plots`` is assumed to contain all the necessary information to
        crop *each plot* from an image or from multiple images. In other words,
        _crop_loop() will perform a single cropping procedure (via
        ``spatial_mod.crop_single()``) for each row in ``df_plots``. Thus,
        all the necessary information should be contained in df_plots to run
        crop_single(). This function is not meant for dataframes containing
        information to perform crop_many(), so be sure to hone in on that
        information before passing ``_crop_loop``.
        '''
        df_iter = tqdm(df_plots.iterrows(), total=df_plots.shape[0]) if self.progress_bar is True else df_plots.iterrows()
        for idx, row in df_iter:
            if self.progress_bar is True:
                df_iter.set_description('Processing file {0}/{1}'.format(idx, len(df_plots)))
            cs = self._crop_read_sheet(row)
            fname = os.path.join(cs['directory'], cs['fname'])
            # print('\nSpatially cropping: {0}'.format(fname))
            name_long = cs['name_long']  # ``None`` if it was never set
            plot_id_ref = cs['plot_id_ref']
            name_short = cs['name_short']
            fname_hdr = fname + '.hdr'
            self.io.read_cube(fname_hdr, name_long=name_long,
                              name_plot=plot_id_ref, name_short=name_short)
            self.my_spatial_mod = spatial_mod(
                self.io.spyfile, gdf, base_dir=self.io.base_dir, name_long=self.io.name_long,
                name_short=self.io.name_short)
            self.my_spatial_mod.defaults = self.io.defaults
            if base_dir_out is None:
                dir_out, name_append = self._save_file_setup(
                        cs['directory'], folder_name, name_append)
            else:
                dir_out, name_append = self._save_file_setup(
                        base_dir_out, folder_name, name_append)
            name_print = self._get_name_print()
            name_label = self._get_name_label(row, name_print, name_append)
            if self._file_exists_check(
                    dir_out, name_label, write_datacube=True,
                    write_geotiff=write_geotiff) is True:
                continue

            cs = self._pix_to_mapunit(cs)
            self.cs = cs
#            if method == 'single':
            # print(cs)
            array_crop, metadata = self.my_spatial_mod.crop_single(
                    pix_e_ul=cs['pix_e_ul'], pix_n_ul=cs['pix_n_ul'],
                    crop_e_pix=cs['crop_e_pix'], crop_n_pix=cs['crop_n_pix'],
                    buf_e_pix=cs['buf_e_pix'], buf_n_pix=cs['buf_n_pix'],
                    gdf_shft_e_pix=cs['gdf_shft_e_pix'], gdf_shft_n_pix=cs['gdf_shft_n_pix'],
                    plot_id_ref=plot_id_ref, gdf=gdf)

            fname = os.path.join(cs['directory'], cs['fname'])
            self._write_datacube(dir_out, name_label, array_crop, metadata)
            if write_geotiff is True:
                self._write_geotiff(array_crop, fname, dir_out, name_label,
                                    metadata, self.my_spatial_mod.tools,
                                    show_img=False)

    def _append_cropping_details(self, df_plots_many, row):
        '''
        Appends all "row" columns to df_plots_many so they carry through
        '''
        for col in row.keys():
            if pd.notnull(row[col]) and col not in df_plots_many.columns:
                df_plots_many[col] = row[col]
        return df_plots_many

    def _crop_many_read_row(self, row, gdf, method):
        '''
        Helper function for reading a row of a dataframe with information about
        how to crop an image many times
        '''
        cs = self._crop_read_sheet(row)  # this function creates cs['fname']
        fname_in = os.path.join(cs['directory'], cs['fname'])
        print('Filename: {0}'.format(fname_in))
        name_long = cs['name_long']  # ``None`` if it was never set
        plot_id_ref = cs['plot_id_ref']
        name_short = cs['name_short']
        fname_hdr = fname_in + '.hdr'
        self.io.read_cube(fname_hdr, name_long=name_long,
                          name_plot=plot_id_ref, name_short=name_short)
        self.my_spatial_mod = spatial_mod(
            self.io.spyfile, gdf, base_dir=self.io.base_dir, name_long=self.io.name_long,
            name_short=self.io.name_short)
        self.my_spatial_mod.defaults = self.io.defaults
        if method == 'many_gdf':
            df_plots_many = self._many_gdf(cs)
        elif method == 'many_grid':
            df_plots_many = self._many_grid(cs)
        else:
            msg = ('``method`` must be either "many_gdf" or "many_grid".\n'
                   'Method: {0}'.format(method))
            raise ValueError(msg)
        df_plots_many = self._append_cropping_details(df_plots_many, row)
        return df_plots_many

    def _many_grid(self, cs):
        '''Wrapper to get consice access to ``spatial_mod.crop_many_grid()'''
        df_plots = self.my_spatial_mod.crop_many_grid(
            cs['plot_id_ref'], pix_e_ul=cs['pix_e_ul'], pix_n_ul=cs['pix_n_ul'],
            crop_e_m=cs['crop_e_m'], crop_n_m=cs['crop_n_m'],
            alley_size_n_m=cs['alley_size_n_m'], buf_e_m=cs['buf_e_m'],
            buf_n_m=cs['buf_n_m'], n_plots_x=cs['n_plots_x'],
            n_plots_y=cs['n_plots_y'])
        return df_plots

    def _many_gdf(self, cs):
        '''
        Wrapper to get consice access to ``spatial_mod.crop_many_gdf();
        ``my_spatial_mod`` already has access to ``spyfile`` and ``gdf``, so no
        need to pass them here.

        If the buffer settings are None, but there are default settings for
        them, they are passed here
        '''
        if cs['plot_id_ref'] is None:
            cs['plot_id_ref'] = self.io.defaults.crop_defaults.plot_id_ref
        df_plots = self.my_spatial_mod.crop_many_gdf(
            plot_id_ref=cs['plot_id_ref'], pix_e_ul=cs['pix_e_ul'],
            pix_n_ul=cs['pix_n_ul'],
            crop_e_m=cs['crop_e_m'], crop_n_m=cs['crop_n_m'],
            crop_e_pix=cs['crop_e_pix'], crop_n_pix=cs['crop_n_pix'],
            buf_e_m=cs['buf_e_m'], buf_n_m=cs['buf_n_m'],
            buf_e_pix=cs['buf_e_pix'], buf_n_pix=cs['buf_n_pix'],
            gdf_shft_e_m=cs['gdf_shft_e_m'], gdf_shft_n_m=cs['gdf_shft_n_m'],
            gdf_shft_e_pix=cs['gdf_shft_e_pix'],
            gdf_shft_n_pix=cs['gdf_shft_n_pix'],
            n_plots=cs['n_plots'])
        return df_plots

    def _crop_check_files(self, df_plots):
        '''
        If file already exists and out_force is False, removes that file from
        the df_plots
        '''
        df_plots.reset_index(inplace=True)
        df_plots_out = df_plots.copy()
        for idx, row in df_plots.iterrows():
            fname = os.path.join(row['directory'], row['name_short'] +
                                 row['name_long'] + row['ext'])
            if os.path.isfile(fname):
                df_plots_out.drop(idx, inplace=True)
        df_plots_out.reset_index(inplace=True)
        return df_plots_out

    def _execute_crop(self, fname_sheet, fname_list, base_dir_out, folder_name,
                      name_append, write_geotiff, method, gdf):
        '''
        Actually executes the spatial crop to keep the main function a bit
        cleaner

        Either `fname_sheet` or `fname_list` should be None
        '''
        df_plots = self._crop_check_input(fname_sheet, fname_list, method)
        # if not pd.isnull(df_plots):
        if df_plots is not None and not df_plots.empty:
            if 'date' in df_plots.columns and isinstance(df_plots['date'], str):
                df_plots['date'] = pd.to_datetime(df_plots['date'])
        if method == 'single':
            # self._crop_loop(df_plots)
            self._crop_loop(df_plots, gdf, base_dir_out, folder_name,
                   name_append, write_geotiff)
        elif method == 'many_gdf' and isinstance(df_plots, pd.DataFrame):
            # if user passes a dataframe, just do whatever it says..
            # loop through each row, doing crop_many_gdf() on each row with
            # whatever parameters are passed via the columns..
            # we should assume that each row of df_plots contains an image that
            # should have crop_many_gdf performed on it to create a new
            # dataframe that can be passed to _crop_loop()
            for idx, row in df_plots.iterrows():
                print('Computing information to spatially crop via '
                      '``spatial_mod.crop_many_gdf``:')
                df_plots_many = self._crop_many_read_row(row, gdf, method)
                self.df_plots_many = df_plots_many
                self._crop_loop(df_plots_many, gdf, base_dir_out, folder_name,
                                name_append, write_geotiff)
        elif method == 'many_gdf' and df_plots is None:
            print('Because ``fname_list`` was passed instead of '
                  '``fname_sheet``, there is not a way to infer the study '
                  'name and date. Therefore, "study" and "date" will be '
                  'omitted from the output file name. If you would like '
                  'output file names to include "study" and "date", please '
                  'pass ``fname_sheet`` with "study" and "date" columns.\n')
            for fname_in in fname_list:
                self.io.read_cube(fname_in)
                self.my_spatial_mod = spatial_mod(
                    self.io.spyfile, gdf, base_dir=self.io.base_dir,
                    name_long=self.io.name_long,
                    name_short=self.io.name_short)
                # as long as defaults are set ahead of time, they should carry through
                # e.g., batch.io.defaults.crop_defaults.n_plots = 40 to limit to 40 plots
                self.my_spatial_mod.defaults = self.io.defaults
                df_plots_many = self.my_spatial_mod.crop_many_gdf()
                self._crop_loop(df_plots_many, gdf, base_dir_out, folder_name,
                                name_append, write_geotiff)
        elif method == 'many_grid' and isinstance(df_plots, pd.DataFrame):
            for idx, row in df_plots.iterrows():
                print('\nComputing information to spatially crop via '
                      '``spatial_mod.crop_many_grid``:')
                df_plots_many = self._crop_many_read_row(row, gdf, method)
                self._crop_loop(df_plots_many, gdf, base_dir_out, folder_name,
                                name_append, write_geotiff)
        else:
            msg = ('Either ``method`` or ``df_plots`` are not defined '
                   'correctly. If using "many_grid" method, please be sure '
                   '``df_plots`` is being populated correcty\n\n``method``: '
                   '{0}'.format(method))
            raise ValueError(msg)

    def _write_datacube(self, dir_out, name_label, array, metadata):
        '''
        Writes a datacube to file using ``hsio.write_cube()``
        '''
        metadata['label'] = name_label
        hdr_file = os.path.join(dir_out, name_label + '.hdr')
        self.io.write_cube(hdr_file, array, metadata=metadata,
                           dtype=self.io.defaults.envi_write.dtype,
                           force=self.io.defaults.envi_write.force,
                           ext=self.io.defaults.envi_write.ext,
                           interleave=self.io.defaults.envi_write.interleave,
                           byteorder=self.io.defaults.envi_write.byteorder)

    def _write_geotiff(self, array, fname, dir_out, name_label, metadata,
                       tools, show_img=False):
        metadata['label'] = name_label
        msg = ('Projection and Geotransform information are required for '
               'writing the geotiff. This comes from the input filename, '
               'so please be sure the correct filename is passed to '
               '``fname``.\n')
        assert fname is not None and os.path.isfile(fname), msg
        fname_tif = os.path.join(dir_out,
                                 os.path.splitext(name_label)[0] + '.tif')
        img_ds = self.io._read_envi_gdal(fname_in=fname)
        projection_out = img_ds.GetProjection()
#            geotransform_out = img_ds.GetGeotransform()
        img_ds = None  # I only want to use GDAL when I have to..

        map_set = metadata['map info']
        ul_x_utm = tools.get_meta_set(map_set, 3)
        ul_y_utm = tools.get_meta_set(map_set, 4)
        size_x_m = tools.get_meta_set(map_set, 5)
        size_y_m = tools.get_meta_set(map_set, 6)
        # Note the last pixel size must be negative to begin at upper left
        geotransform_out = [ul_x_utm, size_x_m, 0.0, ul_y_utm, 0.0,
                            -size_y_m]
        self.io.write_tif(fname_tif, spyfile=array,
                          projection_out=projection_out,
                          geotransform_out=geotransform_out,
                          show_img=show_img)

    def _write_spec(self, dir_out, name_label, spec_mean, spec_std,
                    metadata, folder_name=None):
        '''
        folder_name added to support an extra level on foler if desired. If
        left to ``None``, spec will be written to dir_out, but if a str is
        passed, spec will be written to os.path.join(dir_out, str).
        '''
        metadata['label'] = name_label
        if folder_name:  # else just use dir_out as is
            dir_out, _ = self._save_file_setup(dir_out, folder_name, None)

        hdr_file = os.path.join(dir_out, name_label + '.hdr')
        self.io.write_spec(hdr_file, spec_mean, spec_std,
                           dtype=self.io.defaults.envi_write.dtype,
                           force=self.io.defaults.envi_write.force,
                           ext=self.io.defaults.envi_write.ext,
                           interleave=self.io.defaults.envi_write.interleave,
                           byteorder=self.io.defaults.envi_write.byteorder,
                           metadata=metadata)

    def _execute_spec_clip(self, fname_list, base_dir_out, folder_name,
                           name_append, wl_bands):
        '''
        Actually executes the spectral clip to keep the main function a bit
        cleaner
        '''
        fname_list_p = tqdm(fname_list) if self.progress_bar is True else fname_list
        for idx, fname in enumerate(fname_list_p):
            if self.progress_bar is True:
                fname_list_p.set_description('Processing file {0}/{1}'.format(idx, len(fname_list)))
            # print('\nSpectrally clipping: {0}'.format(fname))
            # options for io.read_cube():
            # name_long, name_plot, name_short, individual_plot, overwrite
            self.io.read_cube(fname)
            self.my_spectral_mod = spec_mod(self.io.spyfile)
            base_dir = os.path.dirname(fname)
            if base_dir_out is None:
                dir_out, name_append = self._save_file_setup(
                        base_dir, folder_name, name_append)
            else:
                dir_out, name_append = self._save_file_setup(
                        base_dir_out, folder_name, name_append)
            name_print = self._get_name_print()
            array_clip, metadata = self.my_spectral_mod.spectral_clip(
                    wl_bands=wl_bands)

            name_label = (name_print + name_append + '.' +
                          self.io.defaults.envi_write.interleave)
            metadata['label'] = name_label

            hdr_file = os.path.join(dir_out, name_label + '.hdr')
            self.io.write_cube(hdr_file, array_clip,
                               dtype=self.io.defaults.envi_write.dtype,
                               force=self.io.defaults.envi_write.force,
                               ext=self.io.defaults.envi_write.ext,
                               interleave=self.io.defaults.envi_write.interleave,
                               byteorder=self.io.defaults.envi_write.byteorder,
                               metadata=metadata)

    def _execute_spec_clip_pp(self, fname, base_dir_out, folder_name, name_append, wl_bands):
        '''
        Actually executes the spectral clip to keep the main function a bit
        cleaner
        '''
        # print('Arglist: {0}'.format(arg_list))
        # fname, base_dir_out, folder_name, name_append, wl_bands = arg_list
        print('\nSpectrally clipping: {0}'.format(fname))
        # options for io.read_cube():
        # name_long, name_plot, name_short, individual_plot, overwrite
        self.io.read_cube(fname)
        self.my_spectral_mod = spec_mod(self.io.spyfile)
        base_dir = os.path.dirname(fname)
        if base_dir_out is None:
            dir_out, name_append = self._save_file_setup(
                    base_dir, folder_name, name_append)
        else:
            dir_out, name_append = self._save_file_setup(
                    base_dir_out, folder_name, name_append)
        name_print = self._get_name_print()
        array_clip, metadata = self.my_spectral_mod.spectral_clip(
                wl_bands=wl_bands)

        name_label = (name_print + name_append + '.' +
                      self.io.defaults.envi_write.interleave)
        metadata['label'] = name_label

        hdr_file = os.path.join(dir_out, name_label + '.hdr')
        self.io.write_cube(hdr_file, array_clip,
                           dtype=self.io.defaults.envi_write.dtype,
                           force=self.io.defaults.envi_write.force,
                           ext=self.io.defaults.envi_write.ext,
                           interleave=self.io.defaults.envi_write.interleave,
                           byteorder=self.io.defaults.envi_write.byteorder,
                           metadata=metadata)

    def _execute_spec_combine(self, fname_list, base_dir_out):
        '''
        Actually executes the spectra combine to keep the main function a bit
        cleaner
        '''
        df_specs = None
        if base_dir_out is None:
            base_dir_out = os.path.dirname(fname_list[0])
        pix_n = 0
        for fname in fname_list:
            self.io.read_spec(fname)
            spy_mem = self.io.spyfile_spec.open_memmap()
            pix_n += (np.count_nonzero(~np.isnan(spy_mem)) /
                      self.io.spyfile_spec.nbands)
        print('Combining datacubes/spectra into a single mean spectra.\n'
              'Number of input datacubes/spectra: {0}\nTotal number of '
              'pixels: {1}'
              ''.format(len(fname_list), int(pix_n)))
        for fname in fname_list:
            self.io.read_spec(fname)
            array = self.io.spyfile_spec.load()

            if len(array.shape) == 3:
                pixels = array.reshape((array.shape[0]*array.shape[1]),
                                       array.shape[2])
            else:
                pixels = array.reshape((array.shape[0]), array.shape[2])
            if df_specs is None:
                df_specs = pd.DataFrame(pixels, dtype=float)
            else:
                df_temp = pd.DataFrame(pixels, dtype=float)
                df_specs = df_specs.append(df_temp, ignore_index=True)

        self.df_mean = df_specs.mean()
        self.df_mean = self.df_mean.rename('mean')
        self.df_std = df_specs.std()
        self.df_std = self.df_std.rename('std')
        df_cv = self.df_mean / self.df_std
        df_cv = df_cv.rename('cv')

        hdr_file = os.path.join(base_dir_out, 'spec_mean_spy.spec.hdr')
        self.io.write_spec(hdr_file, self.df_mean, self.df_std,
                           dtype=self.io.defaults.envi_write.dtype,
                           force=self.io.defaults.envi_write.force,
                           ext=self.io.defaults.envi_write.ext,
                           interleave=self.io.defaults.envi_write.interleave,
                           byteorder=self.io.defaults.envi_write.byteorder,
                           metadata=self.io.spyfile_spec.metadata)

    def _execute_spec_mimic(self, fname_list, base_dir_out, folder_name,
                            name_append, sensor, df_band_response, col_wl,
                            center_wl):
        '''
        Actually executes the spectral resample to keep the main function a bit
        cleaner.
        '''
        fname_list_p = tqdm(fname_list) if self.progress_bar is True else fname_list
        for idx, fname in enumerate(fname_list_p):
            if self.progress_bar is True:
                fname_list_p.set_description('Processing file {0}/{1}'
                                             ''.format(idx, len(fname_list)))
            self.io.read_cube(fname)
            self.my_spectral_mod = spec_mod(self.io.spyfile)
            base_dir = os.path.dirname(fname)
            if base_dir_out is None:
                dir_out, name_append = self._save_file_setup(
                        base_dir, folder_name, name_append)
            else:
                dir_out, name_append = self._save_file_setup(
                        base_dir_out, folder_name, name_append)
            name_print = self._get_name_print()
            array_mimic, metadata = self.my_spectral_mod.spectral_mimic(
                    sensor=sensor, df_band_response=df_band_response,
                    col_wl=col_wl, center_wl=center_wl)

            name_label = (name_print + name_append + '.' +
                          self.io.defaults.envi_write.interleave)
            metadata['label'] = name_label

            hdr_file = os.path.join(dir_out, name_label + '.hdr')
            self.io.write_cube(hdr_file, array_mimic,
                               dtype=self.io.defaults.envi_write.dtype,
                               force=self.io.defaults.envi_write.force,
                               ext=self.io.defaults.envi_write.ext,
                               interleave=self.io.defaults.envi_write.interleave,
                               byteorder=self.io.defaults.envi_write.byteorder,
                               metadata=metadata)

    def _execute_spec_resample(self, fname_list, base_dir_out, folder_name,
                               name_append, bandwidth, bins_n):
        '''
        Actually executes the spectral resample to keep the main function a bit
        cleaner.
        '''
        fname_list_p = tqdm(fname_list) if self.progress_bar is True else fname_list
        for idx, fname in enumerate(fname_list_p):
            if self.progress_bar is True:
                fname_list_p.set_description('Processing file {0}/{1}'
                                             ''.format(idx, len(fname_list)))
            self.io.read_cube(fname)
            self.my_spectral_mod = spec_mod(self.io.spyfile)
            base_dir = os.path.dirname(fname)
            if base_dir_out is None:
                dir_out, name_append = self._save_file_setup(
                        base_dir, folder_name, name_append)
            else:
                dir_out, name_append = self._save_file_setup(
                        base_dir_out, folder_name, name_append)
            name_print = self._get_name_print()
            array_bin, metadata = self.my_spectral_mod.spectral_resample(
                    bandwidth=bandwidth, bins_n=bins_n)

            name_label = (name_print + name_append + '.' +
                          self.io.defaults.envi_write.interleave)
            metadata['label'] = name_label

            hdr_file = os.path.join(dir_out, name_label + '.hdr')
            self.io.write_cube(hdr_file, array_bin,
                               dtype=self.io.defaults.envi_write.dtype,
                               force=self.io.defaults.envi_write.force,
                               ext=self.io.defaults.envi_write.ext,
                               interleave=self.io.defaults.envi_write.interleave,
                               byteorder=self.io.defaults.envi_write.byteorder,
                               metadata=metadata)

    def _execute_spec_smooth(self, fname_list, base_dir_out, folder_name,
                             name_append, window_size, order, stats):
        '''
        Actually executes the spectral smooth to keep the main function a bit
        cleaner
        '''
        if stats is True:
            df_smooth_stats = pd.DataFrame(
                    columns=['fname', 'mean', 'std', 'cv'])

        fname_list_p = tqdm(fname_list) if self.progress_bar is True else fname_list
        for idx, fname in enumerate(fname_list_p):
            if self.progress_bar is True:
                fname_list_p.set_description('Processing file {0}/{1}'.format(idx, len(fname_list)))
            # print('\nSpectrally smoothing: {0}'.format(fname))
            self.io.read_cube(fname)
            self.my_spectral_mod = spec_mod(self.io.spyfile)
            base_dir = os.path.dirname(fname)
            if base_dir_out is None:
                dir_out, name_append = self._save_file_setup(
                        base_dir, folder_name, name_append)
            else:
                dir_out, name_append = self._save_file_setup(
                        base_dir_out, folder_name, name_append)
            name_print = self._get_name_print()
            array_smooth, metadata = self.my_spectral_mod.spectral_smooth(
                    window_size=window_size, order=order)

            name_label = (name_print + name_append + '.' +
                          self.io.defaults.envi_write.interleave)
            metadata['label'] = name_label

            hdr_file = os.path.join(dir_out, name_label + '.hdr')
            self.io.write_cube(hdr_file, array_smooth,
                               dtype=self.io.defaults.envi_write.dtype,
                               force=self.io.defaults.envi_write.force,
                               ext=self.io.defaults.envi_write.ext,
                               interleave=self.io.defaults.envi_write.interleave,
                               byteorder=self.io.defaults.envi_write.byteorder,
                               metadata=metadata)

            if stats is True:
                mean = np.nanmean(array_smooth)
                std = np.nanstd(array_smooth)
                cv = std/mean
                df_smooth_temp = pd.DataFrame([[fname, mean, std, cv]],
                                              columns=['fname', 'mean', 'std',
                                                       'cv'])
                df_smooth_stats = df_smooth_stats.append(df_smooth_temp,
                                                         ignore_index=True)

        if stats is True:
            self._write_stats(dir_out, df_smooth_stats, fname_csv=name_append[1:] + '-stats.csv')
            # return df_smooth_stats

    def _execute_spec_smooth_pp(self, fname, base_dir_out, folder_name,
                                name_append, window_size, order, stats):
        '''
        Actually executes the spectral smooth to keep the main function a bit
        cleaner
        '''
        print('\nSpectrally smoothing: {0}'.format(fname))
        if stats is True:
            df_smooth_stats = pd.DataFrame(
                    columns=['fname', 'mean', 'std', 'cv'])

        self.io.read_cube(fname)
        self.my_spectral_mod = spec_mod(self.io.spyfile)
        base_dir = os.path.dirname(fname)
        if base_dir_out is None:
            dir_out, name_append = self._save_file_setup(
                    base_dir, folder_name, name_append)
        else:
            dir_out, name_append = self._save_file_setup(
                    base_dir_out, folder_name, name_append)
        name_print = self._get_name_print()
        array_smooth, metadata = self.my_spectral_mod.spectral_smooth(
                window_size=window_size, order=order)

        name_label = (name_print + name_append + '.' +
                      self.io.defaults.envi_write.interleave)
        metadata['label'] = name_label

        hdr_file = os.path.join(dir_out, name_label + '.hdr')
        self.io.write_cube(hdr_file, array_smooth,
                           dtype=self.io.defaults.envi_write.dtype,
                           force=self.io.defaults.envi_write.force,
                           ext=self.io.defaults.envi_write.ext,
                           interleave=self.io.defaults.envi_write.interleave,
                           byteorder=self.io.defaults.envi_write.byteorder,
                           metadata=metadata)

        if stats is True:
            mean = np.nanmean(array_smooth)
            std = np.nanstd(array_smooth)
            cv = std/mean
            df_smooth_temp = pd.DataFrame([[fname, mean, std, cv]],
                                          columns=['fname', 'mean', 'std',
                                                   'cv'])
            df_smooth_stats = df_smooth_stats.append(df_smooth_temp,
                                                     ignore_index=True)
            return df_smooth_stats

    def _get_fname_similar(self, name_to_match, base_dir, search_ext='bip',
                           level=0):
        '''
        Gets a similar filename from another directory
        '''
        fname_list = self._recurs_dir(base_dir, search_ext=search_ext,
                                      level=level)
        fname_similar = []
        for fname in fname_list:
            f = os.path.split(fname)[-1]
            fname_short = f[:f.find('-')]
            # if name_to_match in fname:
            if name_to_match == fname_short:
                fname_similar.append(fname)
        msg1 = ('No files found with a similar name to "{0}". Please be '
                'sure the images are created before continuing (e.g., did '
                'you perform band math yet?)\n\nbase_dir: {1}'
                ''.format(name_to_match, base_dir))
        msg2 = ('Multiple files found with a similar name to {0}. Please '
                'delete files that are not relevant to continue.\n\nbase_dir: '
                '{1}'.format(name_to_match, base_dir))
        assert len(fname_similar) > 0, msg1
        assert len(fname_similar) == 1, msg2
        return fname_similar[0]

    def _get_array_similar(self, dir_search):
        '''
        Retrieves the array from a directory with a similar name to the loaded
        datacube (i.e., there must be a datacube loaded; self.io.spyfile should
        not be ``None``; compares to ``self.io.name_short``).

        Parameters:
            dir_search: directory to search
        '''
        msg = ('Please load a SpyFile prior to using this function')
        assert self.io.spyfile is not None, msg
        if not os.path.isdir(dir_search):
            msg = ('The passed directory does not exist; please pass a valid '
                   'directory path.\nDirectory: {0}'.format(dir_search))
            raise IOError(msg)
        fname_similar = self._get_fname_similar(
                self.io.name_short, dir_search,
                search_ext=self.io.defaults.envi_write.interleave, level=0)
        fpath_similar = os.path.join(dir_search, fname_similar)
        io_mask = hsio()
        io_mask.read_cube(fpath_similar)
        array = io_mask.spyfile.load()
        metadata = io_mask.spyfile.metadata
        return array, metadata

    def _get_class_mask(self, row, filter_cols, n_classes=1):
        '''
        Finds the class with the lowest NDVI in ``row`` and returns the class ID
        to be used to dictate which pixels get masked

        Parameters:
            n_classes (``int``): number of classes to mask; if 1, then will mask
            the minimum ndvi; if more than 1, all classes (default: 1)
        '''
        row_ndvi = row[filter_cols].astype(float)
        row_ndvi = row_ndvi.dropna()
        print(row_ndvi)
        print(n_classes)
        if len(row_ndvi) == n_classes:
            n_classes -= 1
        row_ndvi_small = row_ndvi.nsmallest(n=n_classes)
        class_name = row_ndvi_small.index.values.tolist()
        class_mask = []
        for name in class_name:
            class_int = int(re.search(r'\d+', name).group())
            class_mask.append(class_int)
        return class_mask

    def _recurs_dir(self, base_dir, search_ext='.csv', level=None):
        '''
        Searches all folders and subfolders recursively within <base_dir>
        for filetypes of <search_exp>.
        Returns sorted <outFiles>, a list of full path strings of each result.

        Parameters:
            base_dir: directory path that should include files to be returned
            search_ext: file format/extension to search for in all directories
                and subdirectories
            level: how many levels to search; if None, searches all levels

        Returns:
            out_files: include the full pathname, filename, and ext of all
                files that have ``search_exp`` in their name.
        '''
        if level is None:
            level = 1
        else:
            level -= 1
        d_str = os.listdir(base_dir)
        out_files = []
        for item in d_str:
            full_path = os.path.join(base_dir, item)
            if not os.path.isdir(full_path) and item.endswith(search_ext):
                out_files.append(full_path)
            elif os.path.isdir(full_path) and level >= 0:
                new_dir = full_path  # If dir, then search in that
                out_files_temp = self._recurs_dir(new_dir, search_ext)
                if out_files_temp:  # if list is not empty
                    out_files.extend(out_files_temp)  # add items
        return sorted(out_files)

    def _save_file_setup(self, base_dir_out, folder_name, name_append):
        '''
        Basic setup items when saving manipulated image files to disk

        Parameters:
            base_dir_out (``str``): Parent directory that all processed datacubes
                will be saved.
            folder_name (``str`` or ``None``): Folder to add to
                ``base_dir_out`` to save all the processed datacubes.
            name_append (``str``): name to append to the filename.
        '''
#        if base_dir_out is None:
#            base_dir_out = os.path.join(self.base_dir, folder_name)
        if pd.isnull(folder_name):
            folder_name = ''
        dir_out = os.path.join(base_dir_out, folder_name)
        if not os.path.isdir(dir_out):
            os.mkdir(dir_out)
        if name_append is None:
            name_append = ''
        else:
            if name_append[0] != '-':
                name_append = '-' + str(name_append)
        return dir_out, name_append

    def _get_name_label(self, row, name_print, name_append):
        '''
        Gets the name label for the datacube. By doing this before the
        processing operation, we can check to see if the datacube has
        already been processed and pass over if desired.
        '''
        if 'study' in row.index:
            if row['study'] is not None:
                name_study = 'study_' + str(row['study'] + '_')
        else:
            name_study = ''
        if 'date' in row.index:
            if row['date'] is not None:
                if isinstance(row['date'], str):
                    row['date'] = pd.to_datetime(row['date'])
                name_date = ('date_' + str(row['date'].year).zfill(4) +
                             str(row['date'].month).zfill(2) +
                             str(row['date'].day).zfill(2) + '_')
        else:
            name_date = ''
        if row['plot_id_ref'] is not None:
            name_plot = 'plot_' + str(row['plot_id_ref'])
        else:
            name_plot = ''
        if ((len(name_study) >= 1) and (len(name_date) >= 1) and
            (len(name_plot) >= 1)):  # then remove the name_print variable
            name_label = (name_study + name_date + name_plot + name_append +
                          '.' + self.io.defaults.envi_write.interleave)
        else:
            name_label1 = (name_print + '_' + name_study + name_date +
                           name_plot)
            if name_label1[-1] == '_':
                name_label1 = name_label1[:-1]
            name_label = (name_label1 + name_append + '.' +
                          self.io.defaults.envi_write.interleave)
        return name_label

    def _get_name_print(self, fname_in=None):
        '''

        '''
        name_print = self.io.name_short
        if name_print is None and fname_in is not None:
            base_name = os.path.basename(fname_in)
            name_print = base_name[:base_name.find('-', base_name.rfind('_'))]
        msg = ('Could not get a name for input datacube.\n')
        assert name_print is not None, msg
        return name_print

    def _read_spectra_from_file(self, fname, columns):
        '''
        Reads a single spectra from file
        '''
        self.io.read_spec(fname + '.hdr')
        meta_bands = self.io.tools.meta_bands
        array = self.io.spyfile_spec.load()
        data = list(np.reshape(array, (array.shape[2])) * 100)
        data.insert(0, self.io.name_plot)
        data.insert(0, os.path.basename(fname))
        df_spec_file = pd.DataFrame(data=[data], columns=columns)
        return df_spec_file

    def _print_progress(self, iteration, total, prefix='', suffix='',
                       decimals=1, bar_length=100):
        """
        Call in a loop to create terminal progress bar
        @params:
            iteration   - Required  : current iteration (Int)
            total       - Required  : total iterations (Int)
            prefix      - Optional  : prefix string (Str)
            suffix      - Optional  : suffix string (Str)
            decimals    - Optional  : positive number of decimals in percent complete (Int)
            bar_length  - Optional  : character length of bar (Int)
        """
        str_format = "{0:." + str(decimals) + "f}"
        percents = str_format.format(100 * (iteration / float(total)))
        filled_length = int(round(bar_length * iteration / float(total)))
        bar = f'{"█" * filled_length}{"-" * (bar_length - filled_length)}'
        sys.stdout.write(f'\r{prefix} |{bar}| {percents}% {suffix}'),
        # sys.stdout.write('%s |%s| %s%s %s\r' % (prefix, bar, percents, '%', suffix))
        if iteration == total:
            sys.stdout.write('\n')
        sys.stdout.flush()

[docs]    def cube_to_spectra(self, fname_list=None, base_dir=None, search_ext='bip',
                        dir_level=0, base_dir_out=None,
                        folder_name='cube_to_spec',
                        name_append='cube-to-spec',
                        write_geotiff=True, out_dtype=False, out_force=None,
                        out_ext=False, out_interleave=False,
                        out_byteorder=False):
        '''
        Calculates the mean and standard deviation for each cube in
        ``fname_list`` and writes the result to a ".spec" file.

        Parameters:
            fname_list (``list``, optional): list of filenames to process; if
                left to ``None``, will look at ``base_dir``, ``search_ext``, and
                ``dir_level`` parameters for files to process (default: ``None``).
            base_dir (``str``, optional): directory path to search for files to
                spectrally clip; if ``fname_list`` is not ``None``, ``base_dir`` will
                be ignored (default: ``None``).
            search_ext (``str``): file format/extension to search for in all
                directories and subdirectories to determine which files to
                process; if ``fname_list`` is not ``None``, ``search_ext`` will
                be ignored (default: 'bip').
            dir_level (``int``): The number of directory levels to search; if
                ``None``, searches all directory levels (default: 0).
            base_dir_out (``str``): directory path to save all processed
                spectra; if set to ``None``, a folder named according to the
                ``folder_name`` parameter is added to ``base_dir``
            folder_name (``str``): folder to add to ``base_dir_out`` to save all
                the processed datacubes (default: 'cube_to_spec').
            name_append (``str``): name to append to the filename (default:
                'cube-to-spec').
            write_geotiff (``bool``): whether to save the masked RGB image as a
                geotiff alongside the masked datacube.
            out_XXX: Settings for saving the output files can be adjusted here
                if desired. They are stored in ``batch.io.defaults``, and are
                therefore accessible at a high level. See
                ``hsio.set_io_defaults()`` for more information on each of the
                settings.

        Note:
            The following ``batch`` example builds on the API example results
            of the `spatial_mod.crop_many_gdf`_ function. Please complete the
            `spatial_mod.crop_many_gdf`_ example to be sure your directory
            (i.e., ``base_dir``) is populated with multiple hyperspectral
            datacubes. The following example will be using datacubes located in
            the following directory:
            ``F:\\nigo0024\Documents\hs_process_demo\spatial_mod\crop_many_gdf``

        Example:
            Load and initialize the ``batch`` module, checking to be sure the
            directory exists.

            >>> import os
            >>> from hs_process import batch
            >>> data_dir = r'F:\\nigo0024\Documents\hs_process_demo'
            >>> base_dir = os.path.join(data_dir, 'spatial_mod', 'crop_many_gdf')
            >>> print(os.path.isdir(base_dir))
            >>> hsbatch = batch(base_dir, search_ext='.bip', progress_bar=True)  # searches for all files in ``base_dir`` with a ".bip" file extension
            True

            Use ``batch.cube_to_spectra`` to calculate the *mean* and *standard
            deviation* across all pixels for each of the datacubes in
            ``base_dir``.

            >>> hsbatch.cube_to_spectra(base_dir=base_dir, write_geotiff=False, out_force=True)
            Processing file 39/40: 100%|██████████| 40/40 [00:03<00:00, 13.28it/s]------------------------------------------------| 0.0%


            Use ``seaborn`` to visualize the spectra of plots 1011, 1012, and
            1013. Notice how ``hsbatch.io.name_plot`` is utilized to retrieve
            the plot ID, and how the *"history"* tag is referenced from the
            metadata to determine the number of pixels whose reflectance was
            averaged to create the mean spectra. Also remember that pixels
            across the original input image likely represent a combination of
            soil, vegetation, and shadow.

            >>> import seaborn as sns
            >>> import re
            >>> fname_list = [os.path.join(base_dir, 'cube_to_spec', 'Wells_rep2_20180628_16h56m_pika_gige_7_plot_1011-cube-to-spec-mean.spec'),
                              os.path.join(base_dir, 'cube_to_spec', 'Wells_rep2_20180628_16h56m_pika_gige_7_plot_1012-cube-to-spec-mean.spec'),
                              os.path.join(base_dir, 'cube_to_spec', 'Wells_rep2_20180628_16h56m_pika_gige_7_plot_1013-cube-to-spec-mean.spec')]
            >>> ax = None
            >>> for fname in fname_list:
            >>>     hsbatch.io.read_spec(fname)
            >>>     meta_bands = list(hsbatch.io.tools.meta_bands.values())
            >>>     data = hsbatch.io.spyfile_spec.load().flatten() * 100
            >>>     hist = hsbatch.io.spyfile_spec.metadata['history']
            >>>     pix_n = re.search('<pixel number: (.*)>', hist).group(1)
            >>>     if ax is None:
            >>>         ax = sns.lineplot(x=meta_bands, y=data, label='Plot '+hsbatch.io.name_plot+' (n='+pix_n+')')
            >>>     else:
            >>>         ax = sns.lineplot(x=meta_bands, y=data, label='Plot '+hsbatch.io.name_plot+' (n='+pix_n+')', ax=ax)
            >>> ax.set_xlabel('Wavelength (nm)', weight='bold')
            >>> ax.set_ylabel('Reflectance (%)', weight='bold')
            >>> ax.set_title(r'API Example: `batch.cube_to_spectra`', weight='bold')

            .. image:: img/batch/cube_to_spectra.png

        .. _spatial_mod.crop_many_gdf: hs_process.spatial_mod.html#hs_process.spatial_mod.crop_many_gdf
        '''
        self.io.set_io_defaults(out_dtype, out_force, out_ext, out_interleave,
                                out_byteorder)
        if fname_list is None and base_dir is not None:
            fname_list = self._recurs_dir(base_dir, search_ext, dir_level)
        elif fname_list is None and base_dir is None:
            # base_dir may have been stored to the ``batch`` object
            base_dir = self.base_dir
            msg = ('Please set ``fname_list`` or ``base_dir`` to indicate which '
                   'datacubes should be processed.\n')
            assert base_dir is not None, msg
            fname_list = self._recurs_dir(base_dir, search_ext, dir_level)

        append_extra = '-mean'
        if self.io.defaults.envi_write.force is False:  # otherwise just overwrites if it exists
            fname_list = self._check_processed(fname_list, base_dir_out,
                                               folder_name, name_append,
                                               append_extra, ext='.spec')

        # pb_i = 0
        # pb_len = len(fname_list)
        # pb_prefix = 'cube_to_spectra:'
        # self._print_progress(pb_i, pb_len, prefix=pb_prefix)

        fname_list_p = tqdm(fname_list) if self.progress_bar is True else fname_list
        for idx, fname in enumerate(fname_list_p):
            if self.progress_bar is True:
                fname_list_p.set_description('Processing file {0}/{1}'.format(idx, len(fname_list)))
            self.io.read_cube(fname)
            base_dir = os.path.dirname(fname)
            if base_dir_out is None:
                dir_out, name_append = self._save_file_setup(
                        base_dir, folder_name, name_append)
            else:
                dir_out, name_append = self._save_file_setup(
                        base_dir_out, folder_name, name_append)
            name_print = self._get_name_print()
            name_label = (name_print + name_append + append_extra + '.' +
                          self.io.defaults.envi_write.interleave)
            if self._file_exists_check(
                    dir_out, name_label, write_geotiff=write_geotiff,
                    write_spec=True) is True:
                # self.print_progress(idx+1, pb_len, prefix=pb_prefix)
                continue

            # print('Calculating mean spectra: {0}'.format(fname))
            spec_mean, spec_std, array = self.io.tools.mean_datacube(
                    self.io.spyfile)
            metadata = self.io.spyfile.metadata.copy()
            # because this is specialized, we should make our own history str
            n_pix = self.io.spyfile.nrows * self.io.spyfile.ncols
            hist_str = (' -> hs_process.batch.cube_to_spectra[<pixel number: '
                        '{0}>]'.format(n_pix))
            metadata['history'] += hist_str
            name_label_spec = (os.path.splitext(name_label)[0] +
                               '.spec')
            if write_geotiff is True:
                self._write_geotiff(array, fname, dir_out, name_label,
                                    metadata, self.io.tools)
            # Now write spec (will change map info on metadata)
            self._write_spec(dir_out, name_label_spec, spec_mean, spec_std,
                             metadata)
            # self._print_progress(idx+1, pb_len, prefix=pb_prefix)

[docs]    def segment_composite_band(self, fname_list=None, base_dir=None,
                               search_ext='bip', dir_level=0, base_dir_out=None,
                               folder_name='composite_band',
                               name_append='composite-band',
                               write_geotiff=True, wl1=None, b1=None,
                               list_range=True, plot_out=True,
                               out_dtype=False, out_force=None, out_ext=False,
                               out_interleave=False, out_byteorder=False):
        '''
        Batch processing tool to create a composite band on multiple datacubes
        in the same way. ``batch.segment_composite_band`` is typically used
        prior to  ``batch.segment_create_mask`` to generate the
        images/directory required for the masking process.

        Parameters:
            wl1 (``int``, ``float``, or ``list``): the wavelength (or set of
                wavelengths) to be used as the first parameter of the
                band math index; if ``list``, then consolidates all
                bands between two wavelength values by calculating the mean
                pixel value across all bands in that range (default: ``None``).
            b1 (``int``, ``float``, or ``list``): the band (or set of bands)
                to be used as the first parameter of the band math index; if
                ``list``, then consolidates all bands between two band values
                by calculating the mean pixel value across all bands in that
                range (default: ``None``).
            list_range (``bool``): Whether bands/wavelengths passed as a list
                is interpreted as a range of bands (``True``) or for each
                individual band in the list (``False``). If ``list_range`` is
                ``True``, ``b1``/``wl1`` and ``b2``/``wl2`` should be lists
                with two items, and all bands/wavelegths between the two values
                will be used (default: ``True``).
            plot_out (``bool``): whether to save a histogram of the band math
                result (default: ``True``).
            write_geotiff (``bool``): whether to save the masked RGB image as a
                geotiff alongside the masked datacube.
        '''
        self.io.set_io_defaults(out_dtype, out_force, out_ext, out_interleave,
                                out_byteorder)
        if fname_list is None and base_dir is not None:
            fname_list = self._recurs_dir(base_dir, search_ext, dir_level)
        elif fname_list is None and base_dir is None:
            # base_dir may have been stored to the ``batch`` object
            base_dir = self.base_dir
            msg = ('Please set ``fname_list`` or ``base_dir`` to indicate which '
                   'datacubes should be processed.\n')
            assert base_dir is not None, msg
            fname_list = self._recurs_dir(base_dir, search_ext, dir_level)

        # checks filenames
        if self.io.defaults.envi_write.force is False:  # otherwise just overwrites if it exists
            append_extra = '-comp-{0}'.format(int(np.mean(wl1)))
            fname_list = self._check_processed(
                fname_list, base_dir_out, folder_name, name_append, append_extra)

        self._execute_composite_band(fname_list, base_dir_out, folder_name,
                                     name_append, write_geotiff, wl1, b1,
                                     list_range, plot_out)

[docs]    def segment_band_math(self, fname_list=None, base_dir=None,
                          search_ext='bip', dir_level=0, base_dir_out=None,
                          folder_name='band_math', name_append='band-math',
                          write_geotiff=True, method='ndi', wl1=None, wl2=None,
                          wl3=None, b1=None, b2=None, b3=None,
                          list_range=True, plot_out=True,
                          out_dtype=False, out_force=None, out_ext=False,
                          out_interleave=False, out_byteorder=False):
        '''
        Batch processing tool to perform band math on multiple datacubes in the
        same way. ``batch.segment_band_math`` is typically used prior to
        ``batch.segment_create_mask`` to generate the images/directory required
        for the masking process.

        Parameters:
            method (``str``): Must be one of "ndi" (normalized difference
                index), "ratio" (simple ratio index), "derivative"
                (deriviative-type index), or "mcari2" (modified chlorophyll
                absorption index2). Indicates what kind of band math should be
                performed on the input datacube. The "ndi" method leverages
                ``segment.band_math_ndi()``, the "ratio" method leverages
                ``segment.band_math_ratio()``, and the "derivative" method
                leverages ``segment.band_math_derivative()``. Please see the
                ``segment`` documentation for more information (default:
                "ndi").
            wl1 (``int``, ``float``, or ``list``): the wavelength (or set of
                wavelengths) to be used as the first parameter of the
                band math index; if ``list``, then consolidates all
                bands between two wavelength values by calculating the mean
                pixel value across all bands in that range (default: ``None``).
            wl2 (``int``, ``float``, or ``list``): the wavelength (or set of
                wavelengths) to be used as the second parameter of the
                band math index; if ``list``, then consolidates all
                bands between two wavelength values by calculating the mean
                pixel value across all bands in that range (default: ``None``).
            b1 (``int``, ``float``, or ``list``): the band (or set of bands) to be
                used as the first parameter of the band math index;
                if ``list``, then consolidates all bands between two band values
                by calculating the mean pixel value across all bands in that
                range (default: ``None``).
            b2 (``int``, ``float``, or ``list``): the band (or set of bands) to be
                used as the second parameter of the band math
                index; if ``list``, then consolidates all bands between two band
                values by calculating the mean pixel value across all bands in
                that range (default: ``None``).
            list_range (``bool``): Whether bands/wavelengths passed as a list is
                interpreted as a range of bands (``True``) or for each individual
                band in the list (``False``). If ``list_range`` is ``True``,
                ``b1``/``wl1`` and ``b2``/``wl2`` should be lists with two items, and
                all bands/wavelegths between the two values will be used
                (default: ``True``).
            plot_out (``bool``): whether to save a histogram of the band math
                result (default: ``True``).
            write_geotiff (``bool``): whether to save the masked RGB image as a
                geotiff alongside the masked datacube.

        Note:
            The following ``batch`` example builds on the API example results
            of the `spatial_mod.crop_many_gdf`_ function. Please complete the
            `spatial_mod.crop_many_gdf`_ example to be sure your directory
            (i.e., ``base_dir``) is populated with multiple hyperspectral
            datacubes. The following example will be using datacubes located in
            the following directory:
            ``F:\\nigo0024\Documents\hs_process_demo\spatial_mod\crop_many_gdf``

        Example:
            Load and initialize the ``batch`` module, checking to be sure the
            directory exists.

            >>> import os
            >>> from hs_process import batch
            >>> base_dir = r'F:\\nigo0024\Documents\hs_process_demo\spatial_mod\crop_many_gdf'
            >>> print(os.path.isdir(base_dir))
            True
            >>> hsbatch = batch(base_dir, search_ext='.bip')  # searches for all files in ``base_dir`` with a ".bip" file extension

            Use ``batch.segment_band_math`` to compute the MCARI2 (Modified
            Chlorophyll Absorption Ratio Index Improved; Haboudane et al.,
            2004) spectral index for each of the datacubes in ``base_dir``. See
            `Harris Geospatial`_ for more information about the MCARI2 spectral
            index and references to other spectral indices.

            >>> folder_name = 'band_math_mcari2-800-670-550'  # folder name can be modified to be more descriptive in what type of band math is being performed
            >>> method = 'mcari2'  # must be one of "ndi", "ratio", "derivative", or "mcari2"
            >>> wl1 = 800
            >>> wl2 = 670
            >>> wl3 = 550
            >>> hsbatch.segment_band_math(base_dir=base_dir, folder_name=folder_name,
                                          name_append='band-math', write_geotiff=True,
                                          method=method, wl1=wl1, wl2=wl2, wl3=wl3,
                                          plot_out=True, out_force=True)
            Bands used (``b1``): [198]
            Bands used (``b2``): [135]
            Bands used (``b3``): [77]
            Wavelengths used (``b1``): [799.0016]
            Wavelengths used (``b2``): [669.6752]
            Wavelengths used (``b3``): [550.6128]
            Saving F:\\nigo0024\Documents\hs_process_demo\spatial_mod\crop_many_gdf\band_math_mcari2-800-670-550\Wells_rep2_20180628_16h56m_pika_gige_7_plot_1011-band-math-mcari2-800-670-550.bip
            ...

            ``batch.segment_band_math`` creates a new folder in ``base_dir``
            (in this case the new directory is
            ``F:\\nigo0024\Documents\hs_process_demo\spatial_mod\crop_many_gdf\band_math_mcari2-800-670-550``)
            which contains several data products. The **first** is
            ``band-math-stats.csv``: a spreadsheet containing summary
            statistics for each of the image cubes that were processed via
            ``batch.segment_band_math``; stats include *pixel count*,
            *mean*, *standard deviation*, *median*, and *percentiles* across
            all image pixels.

            **Second** is a ``geotiff`` file for each of the image cubes after the
            band math processing. This can be opened in *QGIS* to visualize in
            a spatial reference system, or can be opened using any software
            that supports floating point *.tif* files.

            .. image:: img/batch/segment_band_math_plot_611-band-math-mcari2-800-670-550_tif.png

            **Third** is the band math raster saved in the *.hdr* file format.
            Note that the data conained here should be the same as in the
            *.tif* file, so it's a matter of preference as to what may be more
            useful. This single band *.hdr* can also be opend in *QGIS*.

            **Fourth** is a histogram of the band math data contained in the
            image. The histogram illustrates the 90th percentile value, which
            may be useful in the segmentation step (e.g., see
            `batch.segment_create_mask`_).

            .. image:: img/batch/segment_band_math_plot_611-band-math-mcari2-800-670-550.png

        .. _Harris Geospatial: https://www.harrisgeospatial.com/docs/NarrowbandGreenness.html#Modified3
        .. _batch.segment_create_mask: hs_process.batch.html#hs_process.batch.segment_create_mask
        '''
        self.io.set_io_defaults(out_dtype, out_force, out_ext, out_interleave,
                                out_byteorder)
        if fname_list is None and base_dir is not None:
            fname_list = self._recurs_dir(base_dir, search_ext, dir_level)
        elif fname_list is None and base_dir is None:
            # base_dir may have been stored to the ``batch`` object
            base_dir = self.base_dir
            msg = ('Please set ``fname_list`` or ``base_dir`` to indicate which '
                   'datacubes should be processed.\n')
            assert base_dir is not None, msg
            fname_list = self._recurs_dir(base_dir, search_ext, dir_level)
        # else fname_list must be passed directly

        if method == 'ndi':
            append_extra = ('-{0}-{1}-{2}'
                            ''.format(method, int(np.mean(wl1)),
                                      int(np.mean(wl2))))
        elif method == 'ratio':
            append_extra = ('-{0}-{1}-{2}'
                            ''.format(method, int(np.mean(wl1)),
                                      int(np.mean(wl2))))
        elif method == 'derivative':
            append_extra = ('-{0}-{1}-{2}-{3}'
                            ''.format(method, int(np.mean(wl1)),
                                      int(np.mean(wl2)),
                                      int(np.mean(wl3))))
        elif method == 'mcari2':
            append_extra = ('-{0}-{1}-{2}-{3}'
                            ''.format(method, int(np.mean(wl1)),
                                      int(np.mean(wl2)),
                                      int(np.mean(wl3))))

        # checks filenames
        if self.io.defaults.envi_write.force is False:  # otherwise just overwrites if it exists
            fname_list = self._check_processed(
                fname_list, base_dir_out, folder_name, name_append, append_extra)
        self._execute_band_math(fname_list, base_dir_out, folder_name,
                                name_append, write_geotiff, method, wl1, wl2,
                                wl3, b1, b2, b3, list_range, plot_out)

[docs]    def segment_create_mask(self, fname_list=None, base_dir=None,
                            search_ext='bip', dir_level=0, mask_dir=None,
                            base_dir_out=None,
                            folder_name='mask', name_append='mask',
                            write_datacube=True, write_spec=True,
                            write_geotiff=True, mask_thresh=None,
                            mask_percentile=None, mask_side='lower',
                            out_dtype=False, out_force=None, out_ext=False,
                            out_interleave=False, out_byteorder=False):
        '''
        Batch processing tool to create a masked array on many datacubes.
        ``batch.segment_create_mask`` is typically used after
        ``batch.segment_band_math`` to mask all the datacubes in a directory
        based on the result of the band math process.

        Parameters:
            mask_thresh (``float`` or ``int``): The value for which to mask the
                array; should be used with ``side`` parameter (default: ``None``).
            mask_percentile (``float`` or ``int``): The percentile of pixels to
                mask; if ``percentile``=95 and ``side``='lower', the lowest 95% of
                pixels will be masked following the band math operation
                (default: ``None``; range: 0-100).
            mask_side (``str``): The side of the threshold for which to apply the
                mask. Must be either 'lower', 'upper', 'outside', or ``None``;
                if 'lower', everything below the threshold will be masked; if
                'outside', the ``thresh`` / ``percentile`` parameter must be
                list-like with two values indicating the lower and upper bounds
                - anything outside of these values will be masked out; if
                ``None``, only the values that exactly match the threshold will
                be masked (default: 'lower').
            geotiff (``bool``): whether to save the masked RGB image as a geotiff
                alongside the masked datacube.

        Note:
            The following ``batch`` example builds on the API example results
            of `spatial_mod.crop_many_gdf`_ and `batch.segment_band_math`_.
            Please complete each of those API examples to be sure your
            directories (i.e., ``base_dir``, and ``mask_dir``) are populated
            with image files. The following example will be masking datacubes
            located in:
            ``F:\\nigo0024\Documents\hs_process_demo\spatial_mod\crop_many_gdf``
            based on MCARI2 images located in:
            ``F:\\nigo0024\Documents\hs_process_demo\spatial_mod\crop_many_gdf\\band_math_mcari2-800-670-550``

        Example:
            Load and initialize the ``batch`` module, ensuring ``base_dir`` is
            a valid directory

            >>> import os
            >>> from hs_process import batch
            >>> base_dir = r'F:\\nigo0024\Documents\hs_process_demo\spatial_mod\crop_many_gdf'
            >>> print(os.path.isdir(base_dir))
            True
            >>> hsbatch = batch(base_dir, search_ext='.bip')  # searches for all files in ``base_dir`` with a ".bip" file extension

            There must be a single-band image that will be used to determine
            which datacube pixels are to be masked (determined via the
            ``mask_dir`` parameter). Point to the directory that contains the
            MCARI2 images.

            >>> mask_dir = os.path.join(base_dir, 'band_math_mcari2-800-670-550')
            >>> print(os.path.isdir(mask_dir))
            True

            Indicate how the MCARI2 images should be used to determine which
            hyperspectal pixels are to be masked. The available parameters for
            controlling this are ``mask_thresh``, ``mask_percentile``, and
            ``mask_side``. We will mask out all pixels that fall below the
            MCARI2 90th percentile.

            >>> mask_percentile = 90
            >>> mask_side = 'lower'

            Finally, indicate the folder to save the masked datacubes and
            perform the batch masking via ``batch.segment_create_mask``

            >>> folder_name = 'mask_mcari2_90th'
            >>> hsbatch.segment_create_mask(base_dir=base_dir, mask_dir=mask_dir,
                                            folder_name=folder_name,
                                            name_append='mask-mcari2-90th', write_geotiff=True,
                                            mask_percentile=mask_percentile,
                                            mask_side=mask_side)
            Saving F:\\nigo0024\Documents\hs_process_demo\spatial_mod\crop_many_gdf\mask_mcari2_90th\Wells_rep2_20180628_16h56m_pika_gige_7_plot_1011-mask-mcari2-90th.bip
            Saving F:\\nigo0024\Documents\hs_process_demo\spatial_mod\crop_many_gdf\mask_mcari2_90th\Wells_rep2_20180628_16h56m_pika_gige_7_plot_1011-mask-mcari2-90th-spec-mean.spec
            ...

            .. image:: img/batch/segment_create_mask_inline.png

            ``batch.segment_create_mask`` creates a new folder in ``base_dir``
            named according to the ``folder_name`` parameter
            (in this case the new directory is
            ``F:\\nigo0024\Documents\hs_process_demo\spatial_mod\crop_many_gdf\mask_mcari2_90th``)
            which contains several data products. The **first** is
            ``mask-stats.csv``: a spreadsheet containing the band math
            threshold value for each image file. In this example, the MCARI2
            value corresponding to the 90th percentile is listed.

            +------------+------------+-------------+
            | fname      | plot_id    |lower-pctl-90|
            +============+============+=============+
            | ...        | 1011       | 0.83222     |
            +------------+------------+-------------+
            | ...        | 1012       | 0.81112     |
            +------------+------------+-------------+
            | ...        | 1013       | 0.74394     |
            +------------+------------+-------------+

            ...etc.

            **Second** is a ``geotiff`` file for each of the image cubes after the
            masking procedure. This can be opened in *QGIS* to visualize in
            a spatial reference system, or can be opened using any software
            that supports floating point *.tif* files. The masked pixels are
            saved as ``null`` values and should render transparently.

            .. image:: img/batch/segment_create_mask_geotiff.png

            **Third** is the full hyperspectral datacube, also with the masked
            pixels saved as ``null`` values. Note that the only pixels
            remaining are the 10% with the highest MCARI2 values.

            .. image:: img/batch/segment_create_mask_datacube.png

            **Fourth** is the mean spectra across the unmasked datacube pixels.
            This is illustrated above by the green line plot (the light green
            shadow represents the standard deviation for each band).

        .. _Harris Geospatial: https://www.harrisgeospatial.com/docs/NarrowbandGreenness.html#Modified3
        .. _batch.segment_band_math: hs_process.batch.html#hs_process.batch.segment_band_math
        '''
        self.io.set_io_defaults(out_dtype, out_force, out_ext, out_interleave,
                                out_byteorder)
        if fname_list is None and base_dir is not None:
            fname_list = self._recurs_dir(base_dir, search_ext, dir_level)
        elif fname_list is None and base_dir is None:
            # base_dir may have been stored to the ``batch`` object
            base_dir = self.base_dir
            msg = ('Please set ``fname_list`` or ``base_dir`` to indicate which '
                   'datacubes should be processed.\n')
            assert base_dir is not None, msg
            fname_list = self._recurs_dir(base_dir, search_ext, dir_level)

        if self.io.defaults.envi_write.force is False:  # otherwise just overwrites if it exists
            fname_list = self._check_processed(fname_list, base_dir_out,
                                               folder_name, name_append)
        self._execute_mask(fname_list, mask_dir, base_dir_out, folder_name,
                           name_append, write_datacube, write_spec,
                           write_geotiff, mask_thresh, mask_percentile,
                           mask_side)

[docs]    def spatial_crop(self, fname_sheet=None, base_dir=None, search_ext='bip',
                     dir_level=0, base_dir_out=None,
                     folder_name='spatial_crop', name_append='spatial-crop',
                     write_geotiff=True, method='single', gdf=None, out_dtype=False,
                     out_force=None, out_ext=False, out_interleave=False,
                     out_byteorder=False):
        '''
        Iterates through a spreadsheet that provides necessary information
        about how each image should be cropped and how it should be saved.

        If ``gdf`` is passed (a geopandas.GoeDataFrame polygon file), the
        cropped images will be shifted to the center of appropriate 'plot_id'
        polygon.

        Parameters:
            fname_sheet (``fname``, ``pandas.DataFrame``, or ``None``, optional):
                The filename of the spreadsheed that provides the
                necessary information for fine-tuning the batch process
                cropping. See below for more information about the required and
                optional contents of ``fname_sheet`` and how to properly format
                it. Optionally, ``fname_sheet`` can be a ``Pandas.DataFrame``.
                If left to ``None``, ``base_dir`` and ``gdf`` must be passed.
            base_dir (``str``, optional): directory path to search for files to
                spatially crop; if ``fname_sheet`` is not ``None``,
                ``base_dir`` will be ignored (default: ``None``).
            base_dir_out (``str``, optional): output directory of the cropped
                image (default: ``None``).
            folder_name (``str``, optional): folder to add to ``base_dir_out``
                to save all the processed datacubes (default: 'spatial_crop').
            name_append (``str``, optional): name to append to the filename
                (default: 'spatial-crop').
            write_geotiff (``bool``, optional): whether to save an RGB image as
                a geotiff alongside the cropped datacube.
            method (``str``, optional): Must be one of "single" or
                "many_gdf". Indicates whether a single plot should be cropped
                from the input datacube or if many/multiple plots should be
                cropped from the input datacube. The "single" method leverages
                `spatial_mod.crop_single()`_ and the "many_gdf" method
                leverages `spatial_mod.crop_many_gdf()`_. Please
                see the ``spatial_mod`` documentation for more information
                (default: "single").
            gdf (``geopandas.GeoDataFrame``, optional): the plot names and
                polygon geometery of each of the plots; 'plot_id' must be used as
                a column name to identify each of the plots, and should be an
                integer.
            out_XXX: Settings for saving the output files can be adjusted here
                if desired. They are stored in ``batch.io.defaults``, and are
                therefore accessible at a high level. See
                `hsio.set_io_defaults()`_ for more information on each of the
                settings.

        **Tips and Tricks for** ``fname_sheet`` **when** ``gdf`` **is not passed**

        If ``gdf`` is not passed, ``fname_sheet`` may have the following
        required column headings that correspond to the relevant parameters in
        `spatial_mod.crop_single()`_ and `spatial_mod.crop_many_gdf()`_:

        #. "directory"
        #. "name_short"
        #. "name_long"
        #. "ext"
        #. "pix_e_ul"
        #. "pix_n_ul".

        With this minimum input, ``batch.spatial_crop`` will read in each
        image, crop from the upper left pixel (determined as
        ``pix_e_ul``/``pix_n_ul``) to the lower right pixel calculated
        based on ``crop_e_pix``/``crop_n_pix`` (which is the width of the
        cropped area in units of pixels).

        Note:
            ``crop_e_pix`` and ``crop_n_pix`` have default values (see
            `defaults.crop_defaults()`_), but they can also be passed
            specifically for each datacube by including appropriate columns in
            ``fname_sheet`` (which takes precedence over
            ``defaults.crop_defaults``).

        ``fname_sheet`` may also have the following optional column headings:

        #. "crop_e_pix"
        #. "crop_n_pix"
        #. "crop_e_m"
        #. "crop_n_m"
        #. "buf_e_pix"
        #. "buf_n_pix"
        #. "buf_e_m"
        #. "buf_n_m"
        #. "gdf_shft_e_m"
        #. "gdf_shft_n_m"
        #. "plot_id_ref"
        #. "study"
        #. "date"

        **More** ``fname_sheet`` **Tips and Tricks**

        #. These optional inputs passed via ``fname_sheet`` allow more control
           over exactly how the images are to be cropped. For a more detailed
           explanation of the information that many of these columns are
           intended to contain, see the documentation for
           `spatial_mod.crop_single()`_ and `spatial_mod.crop_many_gdf()`_.
           Those parameters not referenced should be apparent in the API
           examples and tutorials.

        #. If the column names are different in ``fname_sheet`` than described
           here, `defaults.spat_crop_cols()`_ can be modified to indicate which
           columns correspond to the relevant information.

        #. The *date* and *study* columns do not impact how the datacubes are
           to be cropped, but if this information exists,
           ``batch.spatial_crop`` adds it to the filename of the cropped
           datacube. This can be used to avoid overwriting datacubes with
           similar names, and is especially useful when processing imagery from
           many dates and/or studies/locations and saving them in the same
           directory. If "study", "date", and "plot_id" are all passed, this
           information is used to formulate the output file name; e.g.,
           *study_wells_date_20180628_plot_527-spatial-crop.bip*. If either
           "study" or "date" is missing, the populated variables wil be
           appended to the end of the ``hsio.name_short`` string; e.g.,
           *plot_9_3_pika_gige_1_plot_527-spatial-crop.bip*.

        #. Any other columns can be added to ``fname_sheet``, but
           ``batch.spatial_crop()`` does not use them in any way.

        Note:
            The following ``batch`` example only actually processes *a single*
            hyperspectral image. If more datacubes were present in
            ``base_dir``, however, ``batch.spatial_crop`` would process all
            datacubes that were available.

        Note:
            This example uses ``spatial_mod.crop_many_gdf`` to crop many
            plots from a datacube using a polygon geometry file describing the
            spatial extent of each plot.

        Example:

            Load and initialize the ``batch`` module, checking to be sure the
            directory exists.

            >>> import os
            >>> import geopandas as gpd
            >>> import pandas as pd
            >>> from hs_process import batch
            >>> base_dir = r'F:\\nigo0024\Documents\hs_process_demo'
            >>> print(os.path.isdir(base_dir))
            >>> hsbatch = batch(base_dir, search_ext='.bip', dir_level=0,
                                progress_bar=True)  # searches for all files in ``base_dir`` with a ".bip" file extension
            True

            Load the plot geometry as a ``geopandas.GeoDataFrame``

            >>> fname_gdf = r'F:\\nigo0024\Documents\hs_process_demo\plot_bounds.geojson'
            >>> gdf = gpd.read_file(fname_gdf)

            Perform the spatial cropping using the *"many_gdf"* `method`. Note
            that nothing is being passed to `fname_sheet` here, so
            ``batch.spatial_crop`` is simply going to attempt to crop all plots
            contained within `gdf` that overlap with any datacubes in
            ``base_dir``.

            Passing ``fname_sheet`` directly is definitely more flexible for
            customization. However, some customization is possible while not
            passing ``fname_sheet``. In the example below, we set an easting
            and northing buffer, as well as limit the number of plots to crop
            to 40. These defaults trickle through to
            ``spatial_mod.crop_many_gdf()``, so by setting them on the
            ``batch`` object, they will be recognized when calculating crop
            boundaries from ``gdf``.

            >>> hsbatch.io.defaults.crop_defaults.buf_e_m = 2  # Sets buffer in the easting direction (units of meters)
            >>> hsbatch.io.defaults.crop_defaults.buf_n_m = 0.5
            >>> hsbatch.io.defaults.crop_defaults.n_plots = 40  # We can limit the number of plots to process from gdf
            >>> hsbatch.spatial_crop(base_dir=base_dir, method='many_gdf',
                                     gdf=gdf, out_force=True)

            Because ``fname_list`` was passed instead of ``fname_sheet``, there is not a way to infer the study name and date. Therefore, "study" and "date" will be omitted from the output file name. If you would like output file names to include "study" and "date", please pass ``fname_sheet`` with "study" and "date" columns.

            Processing file 39/40: 100%|██████████| 40/40 [00:02<00:00, 17.47it/s]

            .. image:: img/batch/spatial_crop_inline.png

            A new folder was created in ``base_dir``
            - ``F:\\nigo0024\Documents\hs_process_demo\spatial_crop`` - that
            contains the cropped datacubes and the cropped ``geotiff`` images.
            The Plot ID from the ``gdf`` is used to name each datacube
            according to its plot ID. The ``geotiff`` images can be opened in
            *QGIS* to visualize the images after cropping them.

            .. image:: img/batch/spatial_crop_tifs.png

            The cropped images were brightened in *QGIS* to emphasize the
            cropped boundaries. The plot boundaries are overlaid for reference
            (notice the 2.0 m buffer on the East/West ends and the 0.5 m buffer
            on the North/South sides).

        .. _defaults.crop_defaults(): hs_process.defaults.html#hs_process.defaults.crop_defaults
        .. _defaults.spat_crop_cols(): hs_process.defaults.html#hs_process.defaults.spat_crop_cols
        .. _hsio.set_io_defaults(): hs_process.hsio.html#hs_process.hsio.set_io_defaults
        .. _spatial_mod.crop_single(): hs_process.spatial_mod.html#hs_process.spatial_mod.crop_single
        .. _spatial_mod.crop_many_gdf(): hs_process.spatial_mod.html#hs_process.spatial_mod.crop_many_gdf
        '''
        if method == 'many_gdf':
            msg1 = ('Please pass a valid ``geopandas.GeoDataFrame`` if using '
                    'the "many_gdf" method.\n')
            msg2 = ('Please be sure the passed ``geopandas.GeoDataFrame`` has '
                    'a column by the name of "plot_id", indicating the plot '
                    'ID for each polygon geometry if using the "many_gdf" '
                    'method.\n')
            assert isinstance(gdf, gpd.GeoDataFrame), msg1
            assert 'plot_id' in gdf.columns, msg2
        self.io.set_io_defaults(out_dtype, out_force, out_ext, out_interleave,
                                out_byteorder)

        if fname_sheet is None and base_dir is not None:
            fname_list = self._recurs_dir(base_dir, search_ext, dir_level)
        elif fname_sheet is None and base_dir is None:
            # base_dir may have been stored to the ``batch`` object
            base_dir = self.base_dir
            msg = ('Please set ``fname_list`` or ``base_dir`` to indicate which '
                   'datacubes should be processed.\n')
            assert base_dir is not None, msg
            fname_list = self._recurs_dir(base_dir, search_ext, dir_level)
        else:  # fname_list comes from fname_sheet
            fname_list = None
        # Either fname_sheet or fname_list should be None
        self._execute_crop(fname_sheet, fname_list, base_dir_out,
                           folder_name, name_append, write_geotiff, method, gdf)

[docs]    def spectra_combine(self, fname_list=None, base_dir=None,
                        search_ext='bip', dir_level=0, base_dir_out=None,
                        out_dtype=False, out_force=None, out_ext=False,
                        out_interleave=False, out_byteorder=False):
        '''
        Batch processing tool to gather all pixels from every image in a
        directory, compute the mean and standard deviation, and save as a
        single spectra (i.e., a spectra file is equivalent to a single spectral
        pixel with no spatial information).

        Parameters:
            fname_list (``list``, optional): list of filenames to process; if
                left to ``None``, will look at ``base_dir``, ``search_ext``,
                and ``dir_level`` parameters for files to process (default:
                ``None``).
            base_dir (``str``, optional): directory path to search for files to
                spectrally clip; if ``fname_list`` is not ``None``,
                ``base_dir`` will be ignored (default: ``None``).
            search_ext (``str``): file format/extension to search for in all
                directories and subdirectories to determine which files to
                process; if ``fname_list`` is not ``None``, ``search_ext`` will
                be ignored (default: 'bip').
            dir_level (``int``): The number of directory levels to search; if
                ``None``, searches all directory levels (default: 0).
            base_dir_out (``str``): directory path to save all processed
                datacubes; if set to ``None``, a folder named according to the
                ``folder_name`` parameter is added to ``base_dir`` (default:
                ``None``).
            out_XXX: Settings for saving the output files can be adjusted here
                if desired. They are stored in ``batch.io.defaults, and are
                therefore accessible at a high level. See
                ``hsio.set_io_defaults()`` for more information on each of the
                settings.

        Note:
            The following example will load in several small hyperspectral
            radiance datacubes *(not reflectance)* that were previously cropped
            manually (via Spectronon software). These datacubes represent the
            radiance values of grey reference panels that were placed in the
            field to provide data necessary for converting radiance imagery
            to reflectance. These particular datacubes were extracted
            from several different images captured within ~10 minutes of each
            other.

        Example:
            Load and initialize the ``batch`` module, checking to be sure the
            directory exists.

            >>> import os
            >>> from hs_process import batch
            >>> base_dir = r'F:\\nigo0024\Documents\hs_process_demo\cube_ref_panels'
            >>> print(os.path.isdir(base_dir))
            True
            >>> hsbatch = batch(base_dir)

            Combine all the *radiance* datacubes in the directory via
            ``batch.spectra_combine``.

            >>> hsbatch.spectra_combine(base_dir=base_dir, search_ext='bip',
                                        dir_level=0)
            Combining datacubes/spectra into a single mean spectra.
            Number of input datacubes/spectra: 7
            Total number of pixels: 1516
            Saving F:\\nigo0024\Documents\hs_process_demo\cube_ref_panels\spec_mean_spy.spec

            Visualize the combined spectra by opening in *Spectronon*. The
            solid line represents the mean radiance spectra across all pixels
            and images in ``base_dir``, and the lighter, slightly transparent
            line represents the standard deviation of the radiance across all
            pixels and images in ``base_dir``.

            .. image:: img/batch/spectra_combine.png

            Notice the lower signal at the oxygen absorption region (near 770
            nm). After converting datacubes to reflectance, it may be
            desireable to spectrally clip this region (see
            `spec_mod.spectral_clip()`_)

        .. _spec_mod.spectral_clip(): hs_process.spec_mod.html#hs_process.spec_mod.spectral_clip
        '''
        self.io.set_io_defaults(out_dtype, out_force, out_ext, out_interleave,
                                out_byteorder)
        if fname_list is None and base_dir is not None:
            msg1 = ('``base_dir`` is not a valid directory.\n')
            assert os.path.isdir(base_dir), msg1
            self.base_dir = base_dir
            fname_list = self._recurs_dir(base_dir, search_ext, dir_level)
        elif fname_list is None and base_dir is None:
            # base_dir may have been stored to the ``batch`` object
            base_dir = self.base_dir
            msg2 = ('Please set ``fname_list`` or ``base_dir`` to indicate '
                    'which datacubes should be processed.\n')
            assert base_dir is not None, msg2
            fname_list = self._recurs_dir(base_dir, search_ext, dir_level)

        self._execute_spec_combine(fname_list, base_dir_out)

[docs]    def spectra_derivative(
            self, fname_list=None, base_dir=None, search_ext='spec',
            dir_level=0, base_dir_out=None, folder_name='spec_derivative',
            name_append='spec-derivative', order=1,
            out_dtype=False, out_force=None, out_ext=False,
            out_interleave=False, out_byteorder=False):
        '''
        Batch processing tool to calculate the numeric spectral derivative for
        multiple spectra.

        Parameters:
            fname_list (``list``, optional): list of filenames to process; if
                left to ``None``, will look at ``base_dir``, ``search_ext``,
                and ``dir_level`` parameters for files to process (default:
                ``None``).
            base_dir (``str``, optional): directory path to search for files to
                process; if ``fname_list`` is not ``None``, ``base_dir`` will
                be ignored (default: ``None``).
            search_ext (``str``): file format/extension to search for in all
                directories and subdirectories to determine which files to
                process; if ``fname_list`` is not ``None``, ``search_ext`` will
                be ignored (default: 'bip').
            dir_level (``int``): The number of directory levels to search; if
                ``None``, searches all directory levels (default: 0).
            base_dir_out (``str``): directory path to save all processed
                spectra; if set to ``None``, a folder named according to the
                ``folder_name`` parameter is added to ``base_dir``
            folder_name (``str``): folder to add to ``base_dir_out`` to save
                all the processed datacubes (default: 'spec_derivative').
            name_append (``str``): name to append to the filename (default:
                'spec-derivative').
            order (``int``): The order of the derivative (default: 1).
            out_XXX: Settings for saving the output files can be adjusted here
                if desired. They are stored in ``batch.io.defaults, and are
                therefore accessible at a high level. See
                ``hsio.set_io_defaults()`` for more information on each of the
                settings.


        Note:
            The following `batch` example builds on the API example results
            of the `batch.cube_to_spectra`_ function. Please complete
            both the `spatial_mod.crop_many_gdf`_ and
            `batch.cube_to_spectra`_ examples to be sure your directory
            (i.e., `base_dir`) is populated with multiple hyperspectral
            spectra. The following example will be using spectra located in the
            following directory:
            ``F:\\nigo0024\Documents\hs_process_demo\spatial_mod\crop_many_gdf\cube_to_spec``

        Example:
            Load and initialize the ``batch`` module, checking to be sure the
            directory exists.

            >>> import os
            >>> from hs_process import batch
            >>> data_dir = r'F:\\nigo0024\Documents\hs_process_demo'
            >>> base_dir = os.path.join(data_dir, 'spatial_mod', 'crop_many_gdf', 'cube_to_spec')
            >>> print(os.path.isdir(base_dir))
            >>> hsbatch = batch(base_dir, search_ext='.spec', progress_bar=True)

            Use ``batch.spectra_derivative`` to calculate the central finite
            difference (i.e., the numeric spectral derivative) for each of the
            .spec files in ``base_dir``.

            >>> order = 1
            >>> hsbatch.spectra_derivative(base_dir=base_dir, order=order, out_force=True)

            Use seaborn to visualize the derivative spectra of plots 1011,
            1012, and 1013.

            >>> import seaborn as sns
            >>> import re
            >>> fname_list = [os.path.join(base_dir, 'spec_derivative', 'Wells_rep2_20180628_16h56m_pika_gige_7_plot_1011-spec-derivative-order-{0}.spec'.format(order)),
                              os.path.join(base_dir, 'spec_derivative', 'Wells_rep2_20180628_16h56m_pika_gige_7_plot_1012-spec-derivative-order-{0}.spec'.format(order)),
                              os.path.join(base_dir, 'spec_derivative', 'Wells_rep2_20180628_16h56m_pika_gige_7_plot_1013-spec-derivative-order-{0}.spec'.format(order))]
            >>> ax = None
            >>> for fname in fname_list:
            >>>     hsbatch.io.read_spec(fname)
            >>>     meta_bands = list(hsbatch.io.tools.meta_bands.values())
            >>>     data = hsbatch.io.spyfile_spec.open_memmap().flatten() * 100
            >>>     hist = hsbatch.io.spyfile_spec.metadata['history']
            >>>     pix_n = re.search('<pixel number: (?s)(.*)>] ->', hist).group(1)
            >>>     if ax is None:
            >>>         ax = sns.lineplot(meta_bands, 0, color='gray')
            >>>         ax = sns.lineplot(x=meta_bands, y=data, label='Plot '+hsbatch.io.name_plot+' (n='+pix_n+')')
            >>>     else:
            >>>         ax = sns.lineplot(x=meta_bands, y=data, label='Plot '+hsbatch.io.name_plot+' (n='+pix_n+')', ax=ax)
            >>> ax.set(ylim=(-1, 1))
            >>> ax.set_xlabel('Wavelength (nm)', weight='bold')
            >>> ax.set_ylabel('Derivative reflectance (%)', weight='bold')
            >>> ax.set_title(r'API Example: `batch.spectra_derivative`', weight='bold')

            .. image:: img/batch/spectra_derivative.png

        .. _spatial_mod.crop_many_gdf: hs_process.spatial_mod.html#hs_process.spatial_mod.crop_many_gdf
        .. _batch.cube_to_spectra: hs_process.batch.html#hs_process.batch.cube_to_spectra
        '''
        self.io.set_io_defaults(out_dtype, out_force, out_ext, out_interleave,
                                out_byteorder)
        if fname_list is None and base_dir is not None:
            fname_list = self._recurs_dir(base_dir, search_ext, dir_level)
        elif fname_list is None and base_dir is None:
            # base_dir may have been stored to the ``batch`` object
            base_dir = self.base_dir
            msg = ('Please set ``fname_list`` or ``base_dir`` to indicate '
                   'which spectra should be processed.\n')
            assert base_dir is not None, msg
            fname_list = self._recurs_dir(base_dir, search_ext, dir_level)
        if name_append:
             name_append = '{0}-order-{1}'.format(name_append, order)

        if self.io.defaults.envi_write.force is False:  # otherwise just overwrites if it exists
            fname_list = self._check_processed(fname_list, base_dir_out,
                                               folder_name, name_append,
                                               ext='.spec')

        fname_list_p = tqdm(fname_list) if self.progress_bar is True else fname_list
        for idx, fname in enumerate(fname_list_p):
            if self.progress_bar is True:
                fname_list_p.set_description('Processing file {0}/{1}'.format(idx, len(fname_list)))
            self.io.read_spec(fname)
            base_dir = os.path.dirname(fname)
            if base_dir_out is None:
                dir_out, name_append = self._save_file_setup(
                        base_dir, folder_name, name_append)
            else:
                dir_out, name_append = self._save_file_setup(
                        base_dir_out, folder_name, name_append)
            name_print = self._get_name_print()
            name_label = (name_print + name_append + '.spec')
            if self._file_exists_check(dir_out, name_label,
                                       write_spec=True) is True:
                continue
            self.my_spectral_mod = spec_mod(self.io.spyfile_spec)
            spec_dydx, metadata_dydx = self.my_spectral_mod.spec_derivative(
                spyfile_spec=self.my_spectral_mod.spyfile, order=order)
            # name_label_spec = (os.path.splitext(name_label)[0] +
            #                    '-derivative-{0}.spec'.format(order))
            # self.spec_dydx = spec_dydx
            # self.metadata_dydx = metadata_dydx
            self._write_spec(dir_out, name_label, spec_dydx,
                             spec_std=None, metadata=metadata_dydx)

[docs]    def spectra_to_csv(self, fname_list=None, base_dir=None, search_ext='spec',
                       dir_level=0, base_dir_out=None, name='stats-spectra',
                       multithread=False):
        '''
        Reads all the ``.spec`` files in a direcory and saves their reflectance
        information to a ``.csv``. ``batch.spectra_to_csv`` is identical to
        ``batch.spectra_to_df`` except a ``.csv`` file is saved rather than
        returning a ``pandas.DataFrame``.

        Parameters:
            fname_list (``list``, optional): list of filenames to process; if
                left to ``None``, will look at ``base_dir``, ``search_ext``, and
                ``dir_level`` parameters for files to process (default: ``None``).
            base_dir (``str``, optional): directory path to search for files to
                spectrally clip; if ``fname_list`` is not ``None``, ``base_dir`` will
                be ignored (default: ``None``).
            search_ext (``str``): file format/extension to search for in all
                directories and subdirectories to determine which files to
                process; if ``fname_list`` is not ``None``, ``search_ext`` will
                be ignored (default: 'bip').
            dir_level (``int``): The number of directory levels to search; if
                ``None``, searches all directory levels (default: 0).
            base_dir_out (``str``): directory path to save all processed
                datacubes; if set to ``None``, file is saved to ``base_dir``
            name (``str``): The output filename (default: "stats-spectra").
            multithread (``bool``): Whether to leverage multi-thread processing
                when reading the .spec files. Setting to ``True`` should speed
                up the time it takes to read all .spec files.

        Note:
            The following example builds on the API example results of
            `batch.segment_band_math()`_ and `batch.segment_create_mask()_.
            Please complete each of those API examples to be sure your
            directory (i.e.,
            ``F:\\nigo0024\Documents\hs_process_demo\spatial_mod\crop_many_gdf\mask_mcari2_90th``)
            is populated with image files.

        Example:
            Load and initialize the ``batch`` module, checking to be sure the
            directory exists.

            >>> import os
            >>> from hs_process import batch
            >>> base_dir = r'F:\\nigo0024\Documents\hs_process_demo\spatial_mod\crop_many_gdf\mask_mcari2_90th'
            >>> print(os.path.isdir(base_dir))
            True
            >>> hsbatch = batch(base_dir)

            Read all the ``.spec`` files in ``base_dir`` and save them to a
            ``.csv`` file.

            >>> hsbatch.spectra_to_csv(base_dir=base_dir, search_ext='spec',
                                       dir_level=0)
            Writing mean spectra to a .csv file.
            Number of input datacubes/spectra: 40
            Output file location: F:\\nigo0024\Documents\hs_process_demo\spatial_mod\crop_many_gdf\mask_mcari2_90th\stats-spectra.csv

            When ``stats-spectra.csv`` is opened in Microsoft Excel, we can see
            that each row is a ``.spec`` file from a different plot, and each
            column is a particular spectral band/wavelength.

            .. image:: img/batch/spectra_to_csv.png

        .. _batch.segment_band_math(): hs_process.batch.html#hs_process.batch.segment_band_math
        .. _batch.segment_create_mask(): hs_process.batch.html#hs_process.batch.segment_create_mask
        '''
        if fname_list is None and base_dir is not None:
            fname_list = self._recurs_dir(base_dir, search_ext, dir_level)
        elif fname_list is None and base_dir is None:
            # base_dir may have been stored to the ``batch`` object
            base_dir = self.base_dir
            msg = ('Please set ``fname_list`` or ``base_dir`` to indicate which '
                   'datacubes should be processed.\n')
            assert base_dir is not None, msg
            fname_list = self._recurs_dir(base_dir, search_ext, dir_level)
        if base_dir_out is None:
            fname_csv = os.path.join(base_dir, name + '.csv')
        else:
            fname_csv = os.path.join(base_dir_out, name + '.csv')
        print('Writing mean spectra to a .csv file.\n'
              'Number of input datacubes/spectra: {0}\nOutput file location: '
              '{1}'.format(len(fname_list), fname_csv))
        # df_spec = None
        # for fname in fname_list:
        #     self.io.read_spec(fname + '.hdr')
        #     meta_bands = self.io.tools.meta_bands
        #     array = self.io.spyfile_spec.load()
        #     data = list(np.reshape(array, (array.shape[2])) * 100)
        #     data.insert(0, self.io.name_plot)
        #     data.insert(0, os.path.basename(fname))
        #     if df_spec is None:
        #         columns = list(meta_bands.values())
        #         columns.insert(0, 'wavelength')
        #         columns.insert(0, np.nan)
        #         bands = list(meta_bands.keys())
        #         bands.insert(0, 'plot_id')
        #         bands.insert(0, 'fname')
        #         df_spec = pd.DataFrame(data=[bands], columns=columns)
        #     df_spec_temp = pd.DataFrame(data=[data], columns=columns)
        #     df_spec = df_spec.append(df_spec_temp)

        # load the data from the Spectral Python (SpyFile) object
        self.io.read_spec(fname_list[0] + '.hdr')  # read first file to build df_spec column headings
        meta_bands = self.io.tools.meta_bands
        columns = list(meta_bands.values())
        columns.insert(0, 'wavelength')
        columns.insert(0, np.nan)
        bands = list(meta_bands.keys())
        bands.insert(0, 'plot_id')
        bands.insert(0, 'fname')
        df_spec = pd.DataFrame(data=[bands], columns=columns)

        # if multithread is True:
        #     with ThreadPoolExecutor() as executor:  # defaults to min(32, os.cpu_count() + 4)
        #         future_df_spec = {
        #             executor.submit(self._read_spectra_from_file,
        #                             fname,
        #                             df_spec.columns): fname for fname in fname_list}
        #         for future in as_completed(future_df_spec):
        #             data = future_df_spec[future]
        #             try:
        #                 df_spec_file = future.result()
        #                 df_spec = df_spec.append(df_spec_file)
        #             except Exception as exc:
        #                 print('%r generated an exception: %s' % (data, exc))
        # else:
        for fname in fname_list:
            df_spec_file = self._read_spectra_from_file(fname, df_spec.columns)
            df_spec = df_spec.append(df_spec_file)
        df_spec.to_csv(fname_csv, index=False)

[docs]    def spectra_to_df(self, fname_list=None, base_dir=None, search_ext='spec',
                      dir_level=0, multithread=False):
        '''
        Reads all the .spec files in a direcory and returns their data as a
        ``pandas.DataFrame`` object. ``batch.spectra_to_df`` is identical to
        ``batch.spectra_to_csv`` except a ``pandas.DataFrame`` is returned
        rather than saving a ``.csv`` file.

        Parameters:
            fname_list (``list``, optional): list of filenames to process; if
                left to ``None``, will look at ``base_dir``, ``search_ext``, and
                ``dir_level`` parameters for files to process (default: ``None``).
            base_dir (``str``, optional): directory path to search for files to
                spectrally clip; if ``fname_list`` is not ``None``, ``base_dir`` will
                be ignored (default: ``None``).
            search_ext (``str``): file format/extension to search for in all
                directories and subdirectories to determine which files to
                process; if ``fname_list`` is not ``None``, ``search_ext`` will
                be ignored (default: 'bip').
            dir_level (``int``): The number of directory levels to search; if
                ``None``, searches all directory levels (default: 0).
            multithread (``bool``): Whether to leverage multi-thread processing
                when reading the .spec files. Setting to ``True`` should speed
                up the time it takes to read all .spec files.

        Note:
            The following example builds on the API example results of
            `batch.segment_band_math()`_ and `batch.segment_create_mask()_.
            Please complete each of those API examples to be sure your
            directory (i.e.,
            ``F:\\nigo0024\Documents\hs_process_demo\spatial_mod\crop_many_gdf\mask_mcari2_90th``)
            is populated with image files.

        Example:
            Load and initialize the ``batch`` module, checking to be sure the
            directory exists.

            >>> import os
            >>> from hs_process import batch
            >>> base_dir = r'F:\\nigo0024\Documents\hs_process_demo\spatial_mod\crop_many_gdf\mask_mcari2_90th'
            >>> print(os.path.isdir(base_dir))
            True
            >>> hsbatch = batch(base_dir)

            Read all the ``.spec`` files in ``base_dir`` and load them to
            ``df_spec``, a ``pandas.DataFrame``.

            >>> df_spec = hsbatch.spectra_to_df(base_dir=base_dir, search_ext='spec',
                                                dir_level=0)
            Writing mean spectra to a ``pandas.DataFrame``.
            Number of input datacubes/spectra: 40

            When visualizing ``df_spe`` in `Spyder`_, we can see that each row
            is a ``.spec`` file from a different plot, and each column is a
            particular spectral band.

            .. image:: img/batch/spectra_to_df.png

            It is somewhat confusing to conceptualize spectral data by band
            number (as opposed to the wavelenth it represents).
            ``hs_process.hs_tools.get_band`` can be used to retrieve
            spectral data for all plots via indexing by wavelength. Say we need
            to access reflectance at 710 nm for each plot.

            >>> df_710nm = df_spec[['fname', 'plot_id', hsbatch.io.tools.get_band(710)]]

            .. image:: img/batch/spectra_to_df_710nm.png

        .. _batch.segment_band_math(): hs_process.batch.html#hs_process.batch.segment_band_math
        .. _batch.segment_create_mask(): hs_process.batch.html#hs_process.batch.segment_create_mask
        .. _Spyder: https://www.spyder-ide.org/
        '''
        if fname_list is None and base_dir is not None:
            fname_list = self._recurs_dir(base_dir, search_ext, dir_level)
        elif fname_list is None and base_dir is None:
            # base_dir may have been stored to the ``batch`` object
            base_dir = self.base_dir
            msg = ('Please set ``fname_list`` or ``base_dir`` to indicate which '
                   'datacubes should be processed.\n')
            assert base_dir is not None, msg
            fname_list = self._recurs_dir(base_dir, search_ext, dir_level)

        print('Writing mean spectra to a ``pandas.DataFrame``.\n'
              'Number of input datacubes/spectra: {0}'
              ''.format(len(fname_list)))

        # load the data from the Spectral Python (SpyFile) object
        # df_spec = None
        # for fname in fname_list:
        #     self.io.read_spec(fname + '.hdr')
        #     meta_bands = self.io.tools.meta_bands
        #     array = self.io.spyfile_spec.load()
        #     data = list(np.reshape(array, (array.shape[2])))
        #     data.insert(0, self.io.name_plot)
        #     data.insert(0, os.path.basename(fname))
        #     if df_spec is None:
        #         bands = list(meta_bands.keys())
        #         bands.insert(0, 'plot_id')
        #         bands.insert(0, 'fname')
        #         df_spec = pd.DataFrame(columns=bands)
        #     df_spec_temp = pd.DataFrame(data=[data], columns=bands)
        #     df_spec = df_spec.append(df_spec_temp)


        # read first file to build df_spec column headings
        self.io.read_spec(fname_list[0] + '.hdr')
        meta_bands = self.io.tools.meta_bands
        bands = list(meta_bands.keys())
        bands.insert(0, 'plot_id')
        bands.insert(0, 'fname')
        df_spec = pd.DataFrame(columns=bands)
        # if multithread is True:
        #     with ThreadPoolExecutor() as executor:  # defaults to min(32, os.cpu_count() + 4)
        #         future_df_spec = {
        #             executor.submit(self._read_spectra_from_file,
        #                             fname,
        #                             df_spec.columns): fname for fname in fname_list}
        #         for future in as_completed(future_df_spec):
        #             data = future_df_spec[future]
        #             try:
        #                 df_spec_file = future.result()
        #                 df_spec = df_spec.append(df_spec_file)
        #             except Exception as exc:
        #                 print('%r generated an exception: %s' % (data, exc))
        # else:
        for fname in fname_list:
            df_spec_file = self._read_spectra_from_file(fname, df_spec.columns)
            df_spec = df_spec.append(df_spec_file)

        try:
            df_spec['plot_id'] = pd.to_numeric(df_spec['plot_id'])
        except ValueError:
            print('Unable to convert "plot_id" column to numeric type.\n')
        return df_spec.reset_index(drop=True)

[docs]    def spectral_clip(self, fname_list=None, base_dir=None, search_ext='bip',
                      dir_level=0, base_dir_out=None, folder_name='spec_clip',
                      name_append='spec-clip',
                      wl_bands=[[0, 420], [760, 776], [813, 827], [880, 1000]],
                      out_dtype=False, out_force=None, out_ext=False,
                      out_interleave=False, out_byteorder=False):
        '''
        Batch processing tool to spectrally clip multiple datacubes in the same
        way.

        Parameters:
            fname_list (``list``, optional): list of filenames to process; if
                left to ``None``, will look at ``base_dir``, ``search_ext``, and
                ``dir_level`` parameters for files to process (default: ``None``).
            base_dir (``str``, optional): directory path to search for files to
                spectrally clip; if ``fname_list`` is not ``None``, ``base_dir`` will
                be ignored (default: ``None``).
            search_ext (``str``): file format/extension to search for in all
                directories and subdirectories to determine which files to
                process; if ``fname_list`` is not ``None``, ``search_ext`` will
                be ignored (default: 'bip').
            dir_level (``int``): The number of directory levels to search; if
                ``None``, searches all directory levels (default: 0).
            base_dir_out (``str``): directory path to save all processed
                datacubes; if set to ``None``, a folder named according to the
                ``folder_name`` parameter is added to ``base_dir``
            folder_name (``str``): folder to add to ``base_dir_out`` to save all
                the processed datacubes (default: 'spec-clip').
            name_append (``str``): name to append to the filename (default:
                'spec-clip').
            wl_bands (``list`` or ``list of lists``): minimum and maximum
                wavelenths to clip from image; if multiple groups of
                wavelengths should be cut, this should be a list of lists. For
                example, wl_bands=[760, 776] will clip all bands greater than
                760.0 nm and less than 776.0 nm;
                wl_bands = [[0, 420], [760, 776], [813, 827], [880, 1000]]
                will clip all band less than 420.0 nm, bands greater than 760.0
                nm and less than 776.0 nm, bands greater than 813.0 nm and less
                than 827.0 nm, and bands greater than 880 nm (default).
            out_XXX: Settings for saving the output files can be adjusted here
                if desired. They are stored in ``batch.io.defaults, and are
                therefore accessible at a high level. See
                ``hsio.set_io_defaults()`` for more information on each of the
                settings.

        Note:
            The following ``batch`` example builds on the API example results
            of the `batch.spatial_crop`_ function. Please complete the
            `batch.spatial_crop`_ example to be sure your directory
            (i.e., ``base_dir``) is populated with multiple hyperspectral
            datacubes. The following example will be using datacubes located in
            the following directory:
            ``F:\\nigo0024\Documents\hs_process_demo\spatial_crop``

        Example:
            Load and initialize the ``batch`` module, checking to be sure the
            directory exists.

            >>> import os
            >>> from hs_process import batch
            >>> base_dir = r'F:\\nigo0024\Documents\hs_process_demo\spatial_crop'
            >>> print(os.path.isdir(base_dir))
            True
            >>> hsbatch = batch(base_dir, search_ext='.bip', progress_bar=True)  # searches for all files in ``base_dir`` with a ".bip" file extension

            Use ``batch.spectral_clip`` to clip all spectral bands below
            *420 nm* and above *880 nm*, as well as the bands near the oxygen
            absorption (i.e., *760-776 nm*) and water absorption
            (i.e., *813-827 nm*) regions.

            >>> hsbatch.spectral_clip(base_dir=base_dir, folder_name='spec_clip',
                                      wl_bands=[[0, 420], [760, 776], [813, 827], [880, 1000]],
                                      out_force=True)
            Processing 40 files. If this is not what is expected, please check if files have already undergone processing. If existing files should be overwritten, be sure to set the ``out_force`` parameter.
            Processing file 39/40: 100%|██████████| 40/40 [00:01<00:00, 26.68it/s]

            Use ``seaborn`` to visualize the spectra of a single pixel in one
            of the processed images.

            >>> import seaborn as sns
            >>> fname = os.path.join(base_dir, 'Wells_rep2_20180628_16h56m_pika_gige_7_1011-spatial-crop.bip')
            >>> hsbatch.io.read_cube(fname)
            >>> spy_mem = hsbatch.io.spyfile.open_memmap()  # datacube before clipping
            >>> meta_bands = list(hsbatch.io.tools.meta_bands.values())
            >>> fname = os.path.join(base_dir, 'spec_clip', 'Wells_rep2_20180628_16h56m_pika_gige_7_1011-spec-clip.bip')
            >>> hsbatch.io.read_cube(fname)
            >>> spy_mem_clip = hsbatch.io.spyfile.open_memmap()  # datacube after clipping
            >>> meta_bands_clip = list(hsbatch.io.tools.meta_bands.values())
            >>> ax = sns.lineplot(x=meta_bands, y=spy_mem[26][29], label='Before spectral clipping', linewidth=3)
            >>> ax = sns.lineplot(x=meta_bands_clip, y=spy_mem_clip[26][29], label='After spectral clipping', ax=ax)
            >>> ax.set_xlabel('Wavelength (nm)', weight='bold')
            >>> ax.set_ylabel('Reflectance (%)', weight='bold')
            >>> ax.set_title(r'API Example: `batch.spectral_clip`', weight='bold')

            .. image:: img/batch/spectral_clip_plot.png

            Notice the spectral areas that were clipped, namely the oxygen and
            water absorption regions (~770 and ~820 nm, respectively). There
            is perhaps a lower *signal:noise* ratio in these regions, which was
            the merit for clipping those bands out.

        .. _batch.spatial_crop: hs_process.batch.html#hs_process.batch.spatial_crop
        '''
        self.io.set_io_defaults(out_dtype, out_force, out_ext, out_interleave,
                                out_byteorder)
        if fname_list is None and base_dir is not None:
            fname_list = self._recurs_dir(base_dir, search_ext, dir_level)
        elif fname_list is None and base_dir is None:
            # base_dir may have been stored to the ``batch`` object
            base_dir = self.base_dir
            msg = ('Please set ``fname_list`` or ``base_dir`` to indicate which '
                   'datacubes should be processed.\n')
            assert base_dir is not None, msg
            fname_list = self._recurs_dir(base_dir, search_ext, dir_level)

        if self.io.defaults.envi_write.force is False:  # otherwise just overwrites if it exists
            fname_list = self._check_processed(fname_list, base_dir_out,
                                               folder_name, name_append)

        self._execute_spec_clip(fname_list, base_dir_out, folder_name,
                                name_append, wl_bands)

        # with ThreadPoolExecutor(max_workers=None) as executor:  # defaults to min(32, os.cpu_count() + 4)
        #     future_to_clip = {
        #         executor.submit(self._execute_spec_clip_pp,
        #                         fname,
        #                         base_dir_out, folder_name, name_append, wl_bands): fname for fname in fname_list}


[docs]    def spectral_mimic(
            self, fname_list=None, base_dir=None, search_ext='bip',
            dir_level=0, base_dir_out=None, folder_name='spec_mimic',
            name_append='spec-mimic', sensor='sentinel-2a',
            df_band_response=None, col_wl='wl_nm', center_wl='peak',
            out_dtype=False, out_force=None, out_ext=False,
            out_interleave=False, out_byteorder=False):
        '''
        Batch processing tool to spectrally mimic a multispectral sensor for
        multiple datacubes in the same way.

        Parameters:
            fname_list (``list``, optional): list of filenames to process; if
                left to ``None``, will look at ``base_dir``, ``search_ext``,
                and ``dir_level`` parameters for files to process (default:
                ``None``).
            base_dir (``str``, optional): directory path to search for files to
                spectrally resample; if ``fname_list`` is not ``None``,
                ``base_dir`` will be ignored (default: ``None``).
            search_ext (``str``): file format/extension to search for in all
                directories and subdirectories to determine which files to
                process; if ``fname_list`` is not ``None``, ``search_ext`` will
                be ignored (default: 'bip').
            dir_level (``int``): The number of directory levels to search; if
                ``None``, searches all directory levels (default: 0).
            base_dir_out (``str``): directory path to save all processed
                datacubes; if set to ``None``, a folder named according to the
                ``folder_name`` parameter is added to ``base_dir``
            folder_name (``str``): folder to add to ``base_dir_out`` to save
                all the processed datacubes (default: 'spec_bin').
            name_append (``str``): name to append to the filename (default:
                'spec-bin').
            sensor (``str``): Should be one of
                ["sentera_6x", "micasense_rededge_3", "sentinel-2a",
                "sentinel-2b", "custom"]; if "custom", ``df_band_response``
                and ``col_wl`` must be passed.
            df_band_response (``pd.DataFrame``): A DataFrame that contains the
                transmissivity (%) for each sensor band (as columns) mapped to
                the continuous wavelength values (as rows). Required if
                ``sensor`` is  "custom", ignored otherwise.
            col_wl (``str``): The column of ``df_band_response`` denoting the
                wavlengths (default: 'wl_nm').
            center_wl (``str``): Indicates how the center wavelength of each
                band is determined. If ``center_wl`` is "peak", the point at
                which transmissivity is at its maximum is used as the center
                wavelength. If ``center_wl`` is "weighted", the weighted
                average is used to compute the center wavelength. Must be one
                of ["peak", "weighted"] (``default: "peak"``).
            out_XXX: Settings for saving the output files can be adjusted here
                if desired. They are stored in ``batch.io.defaults, and are
                therefore accessible at a high level. See
                ``hsio.set_io_defaults()`` for more information on each of the
                settings.

        Note:
            The following ``batch`` example builds on the API example results
            of the `batch.spatial_crop`_ function. Please complete the
            `batch.spatial_crop`_ example to be sure your directory
            (i.e., ``base_dir``) is populated with multiple hyperspectral
            datacubes. The following example will be using datacubes located in
            the following directory:
            ``F:\\nigo0024\Documents\hs_process_demo\spatial_crop``

        Example:
            Load and initialize the ``batch`` module, checking to be sure the
            directory exists.

            >>> import os
            >>> from hs_process import batch
            >>> base_dir = r'F:\\nigo0024\Documents\hs_process_demo\spatial_crop'
            >>> print(os.path.isdir(base_dir))
            True
            >>> hsbatch = batch(base_dir, search_ext='.bip', progress_bar=True)  # searches for all files in ``base_dir`` with a ".bip" file extension

            Use ``batch.spectral_mimic`` to spectrally mimic the Sentinel-2A
            multispectral satellite sensor.

            >>> hsbatch.spectral_mimic(
                base_dir=base_dir, folder_name='spec_mimic',
                name_append='sentinel-2a',
                sensor='sentinel-2a', center_wl='weighted')
            Processing 40 files. If existing files should be overwritten, be sure to set the ``out_force`` parameter.
            Processing file 39/40: 100%|██████████| 40/40 [00:04<00:00,  8.85it/s]

            Use ``seaborn`` to visualize the spectra of a single pixel in one
            of the processed images.

            >>> import seaborn as sns
            >>> fname = os.path.join(base_dir, 'Wells_rep2_20180628_16h56m_pika_gige_7_1011-spatial-crop.bip')
            >>> hsbatch.io.read_cube(fname)
            >>> spy_mem = hsbatch.io.spyfile.open_memmap()  # datacube before mimicking
            >>> meta_bands = list(hsbatch.io.tools.meta_bands.values())
            >>> fname = os.path.join(base_dir, 'spec_mimic', 'Wells_rep2_20180628_16h56m_pika_gige_7_1011-sentinel-2a.bip')
            >>> hsbatch.io.read_cube(fname)
            >>> spy_mem_sen2a = hsbatch.io.spyfile.open_memmap()  # datacube after mimicking
            >>> meta_bands_sen2a = list(hsbatch.io.tools.meta_bands.values())
            >>> ax = sns.lineplot(x=meta_bands, y=spy_mem[26][29], label='Hyperspectral (Pika II)', linewidth=3)
            >>> ax = sns.lineplot(x=meta_bands_sen2a, y=spy_mem_sen2a[26][29], label='Sentinel-2A "mimic"', marker='o', ms=6, ax=ax)
            >>> ax.set_xlabel('Wavelength (nm)', weight='bold')
            >>> ax.set_ylabel('Reflectance (%)', weight='bold')
            >>> ax.set_title(r'API Example: `batch.spectral_mimic`', weight='bold')

            .. image:: img/batch/spectral_mimic_sentinel-2a_plot.png

        .. _batch.spatial_crop: hs_process.batch.html#hs_process.batch.spatial_crop
        '''
        self.io.set_io_defaults(out_dtype, out_force, out_ext, out_interleave,
                                out_byteorder)
        if fname_list is None and base_dir is not None:
            fname_list = self._recurs_dir(base_dir, search_ext, dir_level)
        elif fname_list is None and base_dir is None:
            # base_dir may have been stored to the ``batch`` object
            base_dir = self.base_dir
            msg = ('Please set ``fname_list`` or ``base_dir`` to indicate '
                   'which datacubes should be processed.\n')
            assert base_dir is not None, msg
            fname_list = self._recurs_dir(base_dir, search_ext, dir_level)

        if self.io.defaults.envi_write.force is False:  # otherwise just overwrites if it exists
            fname_list = self._check_processed(fname_list, base_dir_out,
                                               folder_name, name_append)

        self._execute_spec_mimic(fname_list, base_dir_out, folder_name,
                                 name_append, sensor, df_band_response,
                                 col_wl, center_wl)

[docs]    def spectral_resample(
            self, fname_list=None, base_dir=None, search_ext='bip',
            dir_level=0, base_dir_out=None, folder_name='spec_bin',
            name_append='spec-bin', bandwidth=None, bins_n=None,
            out_dtype=False, out_force=None, out_ext=False,
            out_interleave=False, out_byteorder=False):
        '''
        Batch processing tool to spectrally resample (a.k.a. "bin") multiple
        datacubes in the same way.

        Parameters:
            fname_list (``list``, optional): list of filenames to process; if
                left to ``None``, will look at ``base_dir``, ``search_ext``,
                and ``dir_level`` parameters for files to process (default:
                ``None``).
            base_dir (``str``, optional): directory path to search for files to
                spectrally resample; if ``fname_list`` is not ``None``,
                ``base_dir`` will be ignored (default: ``None``).
            search_ext (``str``): file format/extension to search for in all
                directories and subdirectories to determine which files to
                process; if ``fname_list`` is not ``None``, ``search_ext`` will
                be ignored (default: 'bip').
            dir_level (``int``): The number of directory levels to search; if
                ``None``, searches all directory levels (default: 0).
            base_dir_out (``str``): directory path to save all processed
                datacubes; if set to ``None``, a folder named according to the
                ``folder_name`` parameter is added to ``base_dir``
            folder_name (``str``): folder to add to ``base_dir_out`` to save
                all the processed datacubes (default: 'spec_bin').
            name_append (``str``): name to append to the filename (default:
                'spec-bin').
            bandwidth (``float`` or ``int``): The bandwidth of the bands
                after spectral resampling is complete (units should be
                consistent with that of the .hdr file). Setting ``bandwidth``
                to 10 will consolidate bands that fall within every 10 nm
                interval.
            bins_n (``int``): The number of bins (i.e., "bands") to achieve
                after spectral resampling is complete. Ignored if ``bandwidth``
                is not ``None``.
            out_XXX: Settings for saving the output files can be adjusted here
                if desired. They are stored in ``batch.io.defaults, and are
                therefore accessible at a high level. See
                ``hsio.set_io_defaults()`` for more information on each of the
                settings.

        Note:
            The following ``batch`` example builds on the API example results
            of the `batch.spatial_crop`_ function. Please complete the
            `batch.spatial_crop`_ example to be sure your directory
            (i.e., ``base_dir``) is populated with multiple hyperspectral
            datacubes. The following example will be using datacubes located in
            the following directory:
            ``F:\\nigo0024\Documents\hs_process_demo\spatial_crop``

        Example:
            Load and initialize the ``batch`` module, checking to be sure the
            directory exists.

            >>> import os
            >>> from hs_process import batch
            >>> base_dir = r'F:\\nigo0024\Documents\hs_process_demo\spatial_crop'
            >>> print(os.path.isdir(base_dir))
            True
            >>> hsbatch = batch(base_dir, search_ext='.bip', progress_bar=True)  # searches for all files in ``base_dir`` with a ".bip" file extension

            Use ``batch.spectral_resample`` to bin ("group") all spectral bands
            into 20 nm bandwidth bands (from ~2.3 nm bandwidth originally) on
            a per-pixel basis.

            >>> hsbatch.spectral_resample(
                base_dir=base_dir, folder_name='spec_bin',
                name_append='spec-bin-20', bandwidth=20)
            Processing 40 files. If existing files should be overwritten, be sure to set the ``out_force`` parameter.
            Processing file 39/40: 100%|██████████| 40/40 [00:00<00:00, 48.31it/s]
            ...

            Use ``seaborn`` to visualize the spectra of a single pixel in one
            of the processed images.

            >>> import seaborn as sns
            >>> fname = os.path.join(base_dir, 'Wells_rep2_20180628_16h56m_pika_gige_7_1011-spatial-crop.bip')
            >>> hsbatch.io.read_cube(fname)
            >>> spy_mem = hsbatch.io.spyfile.open_memmap()  # datacube before resampling
            >>> meta_bands = list(hsbatch.io.tools.meta_bands.values())
            >>> fname = os.path.join(base_dir, 'spec_bin', 'Wells_rep2_20180628_16h56m_pika_gige_7_1011-spec-bin-20.bip')
            >>> hsbatch.io.read_cube(fname)
            >>> spy_mem_bin = hsbatch.io.spyfile.open_memmap()  # datacube after resampling
            >>> meta_bands_bin = list(hsbatch.io.tools.meta_bands.values())
            >>> ax = sns.lineplot(x=meta_bands, y=spy_mem[26][29], label='Hyperspectral (Pika II)', linewidth=3)
            >>> ax = sns.lineplot(x=meta_bands_bin, y=spy_mem_bin[26][29], label='Spectral resample (20 nm)', marker='o', ms=6, ax=ax)
            >>> ax.set_xlabel('Wavelength (nm)', weight='bold')
            >>> ax.set_ylabel('Reflectance (%)', weight='bold')
            >>> ax.set_title(r'API Example: `batch.spectral_resample`', weight='bold')

            .. image:: img/batch/spectral_resample-20nm_plot.png

        .. _batch.spatial_crop: hs_process.batch.html#hs_process.batch.spatial_crop
        '''
        self.io.set_io_defaults(out_dtype, out_force, out_ext, out_interleave,
                                out_byteorder)
        if fname_list is None and base_dir is not None:
            fname_list = self._recurs_dir(base_dir, search_ext, dir_level)
        elif fname_list is None and base_dir is None:
            # base_dir may have been stored to the ``batch`` object
            base_dir = self.base_dir
            msg = ('Please set ``fname_list`` or ``base_dir`` to indicate '
                   'which datacubes should be processed.\n')
            assert base_dir is not None, msg
            fname_list = self._recurs_dir(base_dir, search_ext, dir_level)

        if self.io.defaults.envi_write.force is False:  # otherwise just overwrites if it exists
            fname_list = self._check_processed(fname_list, base_dir_out,
                                               folder_name, name_append)

        self._execute_spec_resample(fname_list, base_dir_out, folder_name,
                                    name_append, bandwidth, bins_n)

[docs]    def spectral_smooth(self, fname_list=None, base_dir=None, search_ext='bip',
                        dir_level=0, base_dir_out=None,
                        folder_name='spec_smooth', name_append='spec-smooth',
                        window_size=11, order=2, stats=False,
                        out_dtype=False, out_force=None, out_ext=False,
                        out_interleave=False, out_byteorder=False):
        '''
        Batch processing tool to spectrally smooth multiple datacubes in the
        same way.

        Parameters:
            fname_list (``list``, optional): list of filenames to process; if
                left to ``None``, will look at ``base_dir``, ``search_ext``, and
                ``dir_level`` parameters for files to process (default: ``None``).
            base_dir (``str``, optional): directory path to search for files to
                spectrally clip; if ``fname_list`` is not ``None``, ``base_dir`` will
                be ignored (default: ``None``).
            search_ext (``str``): file format/extension to search for in all
                directories and subdirectories to determine which files to
                process; if ``fname_list`` is not ``None``, ``search_ext`` will
                be ignored (default: 'bip').
            dir_level (``int``): The number of directory levels to search; if
                ``None``, searches all directory levels (default: 0).
            base_dir_out (``str``): directory path to save all processed
                datacubes; if set to ``None``, a folder named according to the
                ``folder_name`` parameter is added to ``base_dir``
            folder_name (``str``): folder to add to ``base_dir_out`` to save all
                the processed datacubes (default: 'spec-smooth').
            name_append (``str``): name to append to the filename (default:
                'spec-smooth').
            window_size (``int``): the length of the window; must be an odd
                integer number (default: 11).
            order (``int``): the order of the polynomial used in the filtering;
                must be less than ``window_size`` - 1 (default: 2).
            stats (``bool``): whether to compute some basic descriptive
                statistics (mean, st. dev., and coefficient of variation) of
                the smoothed data array (default: ``False``)
            out_XXX: Settings for saving the output files can be adjusted here
                if desired. They are stored in ``batch.io.defaults, and are
                therefore accessible at a high level. See
                ``hsio.set_io_defaults()`` for more information on each of the
                settings.

        Note:
            The following ``batch`` example builds on the API example results
            of the `batch.spatial_crop`_ function. Please complete the
            `batch.spatial_crop`_ example to be sure your directory
            (i.e., ``base_dir``) is populated with multiple hyperspectral
            datacubes. The following example will be using datacubes located in
            the following directory:
            ``F:\\nigo0024\Documents\hs_process_demo\spatial_crop``

        Example:
            Load and initialize the ``batch`` module, checking to be sure the
            directory exists.

            >>> import os
            >>> from hs_process import batch
            >>> base_dir = r'F:\\nigo0024\Documents\hs_process_demo\spatial_crop'
            >>> print(os.path.isdir(base_dir))
            True
            >>> hsbatch = batch(base_dir, search_ext='.bip')  # searches for all files in ``base_dir`` with a ".bip" file extension

            Use ``batch.spectral_smooth`` to perform a *Savitzky-Golay*
            smoothing operation on each image/pixel in ``base_dir``. The
            ``window_size`` and ``order`` can be adjusted to achieve desired
            smoothing results.

            >>> hsbatch.spectral_smooth(base_dir=base_dir, folder_name='spec_smooth',
                                        window_size=11, order=2)
            Processing 40 files. If this is not what is expected, please check if files have already undergone processing. If existing files should be overwritten, be sure to set the ``out_force`` parameter.
            Spectrally smoothing: F:\\nigo0024\Documents\hs_process_demo\spatial_crop\Wells_rep2_20180628_16h56m_pika_gige_7_1011-spatial-crop.bip
            Saving F:\\nigo0024\Documents\hs_process_demo\spatial_crop\spec_smooth\Wells_rep2_20180628_16h56m_pika_gige_7_1011-spec-smooth.bip
            Spectrally smoothing: F:\\nigo0024\Documents\hs_process_demo\spatial_crop\Wells_rep2_20180628_16h56m_pika_gige_7_1012-spatial-crop.bip
            Saving F:\\nigo0024\Documents\hs_process_demo\spatial_crop\spec_smooth\Wells_rep2_20180628_16h56m_pika_gige_7_1012-spec-smooth.bip
            ...

            Use ``seaborn`` to visualize the spectra of a single pixel in one
            of the processed images.

            >>> import seaborn as sns
            >>> fname = os.path.join(base_dir, 'Wells_rep2_20180628_16h56m_pika_gige_7_1011-spatial-crop.bip')
            >>> hsbatch.io.read_cube(fname)
            >>> spy_mem = hsbatch.io.spyfile.open_memmap()  # datacube before smoothing
            >>> meta_bands = list(hsbatch.io.tools.meta_bands.values())
            >>> fname = os.path.join(base_dir, 'spec_smooth', 'Wells_rep2_20180628_16h56m_pika_gige_7_1011-spec-smooth.bip')
            >>> hsbatch.io.read_cube(fname)
            >>> spy_mem_clip = hsbatch.io.spyfile.open_memmap()  # datacube after smoothing
            >>> meta_bands_clip = list(hsbatch.io.tools.meta_bands.values())
            >>> ax = sns.lineplot(x=meta_bands, y=spy_mem[26][29], label='Before spectral smoothing', linewidth=3)
            >>> ax = sns.lineplot(x=meta_bands_clip, y=spy_mem_clip[26][29], label='After spectral smoothing', ax=ax)
            >>> ax.set_xlabel('Wavelength (nm)', weight='bold')
            >>> ax.set_ylabel('Reflectance (%)', weight='bold')
            >>> ax.set_title(r'API Example: `batch.spectral_smooth`', weight='bold')

            .. image:: img/batch/spectral_smooth_plot.png

            Notice how the *"choppiness"* of the spectral curve is lessened
            after the smoothing operation. There are spectral regions that
            perhaps had a lower *signal:noise* ratio and did not do particularlly
            well at smoothing (i.e., < 410 nm, ~770 nm, and ~820 nm). It may be
            wise to perform ``batch.spectral_smooth`` *after*
            `batch.spectral_clip`_.

        .. _batch.spatial_crop: hs_process.batch.html#hs_process.batch.spatial_crop
        .. _batch.spectral_clip: hs_process.batch.html#hs_process.batch.spectral_clip
        '''
        self.io.set_io_defaults(out_dtype, out_force, out_ext, out_interleave,
                                out_byteorder)
        if fname_list is None and base_dir is not None:
            fname_list = self._recurs_dir(base_dir, search_ext, dir_level)
        elif fname_list is None and base_dir is None:
            # base_dir may have been stored to the ``batch`` object
            base_dir = self.base_dir
            msg = ('Please set ``fname_list`` or ``base_dir`` to indicate which '
                   'datacubes should be processed.\n')
            assert base_dir is not None, msg
            fname_list = self._recurs_dir(base_dir, search_ext, dir_level)

        if self.io.defaults.envi_write.force is False:  # otherwise just overwrites if it exists
            fname_list = self._check_processed(fname_list, base_dir_out,
                                               folder_name, name_append)
        self._execute_spec_smooth(
                fname_list, base_dir_out, folder_name, name_append,
                window_size, order, stats)
        # if df_stats is not None:
        #     return df_stats


        # # Parallel threading
        # with ThreadPoolExecutor(max_workers=None) as executor:  # defaults to min(32, os.cpu_count() + 4)
        #     future_to_clip = {
        #         executor.submit(self._execute_spec_smooth_pp,
        #                         fname,
        #                         base_dir_out, folder_name, name_append,
        #                         window_size, order, stats): fname for fname in fname_list}

        #     df_stats = None
        #     if stats == True:
        #         for future in as_completed(future_to_clip):
        #             smooth = future_to_clip[future]
        #             try:
        #                 df_smooth_temp = future.result()
        #                 if df_stats is None:
        #                     df_stats = df_smooth_temp.copy()
        #                 else:
        #                     df_stats = df_stats.append(df_smooth_temp, ignore_index=True)
        #             except Exception as exc:
        #                 print('%r generated an exception: %s' % (smooth, exc))
        #             else:
        #                 print('%r page is %d bytes' % (smooth, len(data)))

        #         base_dir = os.path.dirname(fname_list[0])
        #         if base_dir_out is None:
        #             dir_out, name_append = self._save_file_setup(
        #                     base_dir, folder_name, name_append)
        #         else:
        #             dir_out, name_append = self._save_file_setup(
        #                     base_dir_out, folder_name, name_append)

        #         fname_stats = os.path.join(dir_out, name_append[1:] + '-stats.csv')
        #         if os.path.isfile(fname_stats) and self.io.defaults.envi_write.force is False:
        #             df_stats_in = pd.read_csv(fname_stats)
        #             df_smooth_stats = df_stats_in.append(df_stats)
        #         df_smooth_stats.to_csv(fname_stats)
        #         return df_smooth_stats