# -*- coding: utf-8 -*-
import geopandas as gpd
import multiprocessing as mp
import numpy as np
import os
import pandas as pd
import re
import seaborn as sns
import sys
import time
from tqdm import tqdm
from matplotlib import pyplot as plt
import warnings
from hs_process.utilities import defaults
from hs_process.utilities import hsio
from hs_process.segment import segment
from hs_process.spec_mod import spec_mod
from hs_process.spatial_mod import spatial_mod
from concurrent.futures import ThreadPoolExecutor
from concurrent.futures import as_completed
[docs]class batch(object):
'''
Class for batch processing hyperspectral image data. Makes use of
`segment`_, `spatial_mod`_, and `spec_mod`_ to batch process many
datacubes in a given directory. Supports options to save full
datacubes, geotiff renders, as well as summary statistics and/or
reports for the various tools.
Note:
It may be a good idea to review and understand the `defaults`_,
`hsio`_, `hstools`_, `segment`_, `spatial_mod`_, and `spec_mod`_
classes prior to using the ``batch`` module.
.. _defaults: hs_process.defaults.html
.. _hsio: hs_process.hsio.html
.. _hstools: hs_process.hstools.html
.. _segment: hs_process.segment.html
.. _spatial_mod: hs_process.spatial_mod.html
.. _spec_mod: hs_process.spec_mod.html
'''
def __init__(self, base_dir=None, search_ext='.bip', dir_level=0,
lock=None, progress_bar=False):
'''
Parameters:
base_dir (``str``, optional): directory path to search for files to
spectrally clip; if ``fname_list`` is not ``None``, ``base_dir`` will
be ignored (default: ``None``).
search_ext (``str``): file format/extension to search for in all
directories and subdirectories to determine which files to
process; if ``fname_list`` is not ``None``, ``search_ext`` will
be ignored (default: 'bip').
dir_level (``int``): The number of directory levels to search; if
``None``, searches all directory levels (default: 0).
lock (``multiprocessing.Lock``): Can be passed to ensure lock is in
place when writing to a file during multiprocessing.
'''
self.base_dir = base_dir
self.search_ext = search_ext
self.dir_level = dir_level
self.lock = lock
self.progress_bar = progress_bar
self.fname_list = None
if base_dir is not None:
self.fname_list = self._recurs_dir(base_dir, search_ext, dir_level)
self.io = hsio()
self.my_spectral_mod = None
self.my_spatial_mod = None
self.my_segment = None
def _try_spat_crop_col_key(self, key, df_row):
'''
Gets value of ``key`` (column name) from ``df_row``; returns
``None`` if there is a KeyError
This is tricky for crop_X and buf_X columns, because we must decipher
whether to get these values from the default pool or not. If we get a
KeyError, our first instinct is to gather the default, but we must
check the "inverse" first (the "inverse" of crop_e_pix is crop_e_m) to
avoid overwriting a value passed in df_row unintentionally. Therefore,
this function handles keys differently if "crop" or "buf" are part of
``key`` than if they are not part of ``key``
Adds ``key`` to batch.io.defaults.spat_crop_cols if it does not yet
exist, but then of course the ``value`` that is returned will be
``None``
'''
if key not in self.io.defaults.spat_crop_cols.keys():
print('Adding key "{0}" to defaults.spat_crop_cols dictionary'
''.format(key))
self.io.defaults.spat_crop_cols[key] = key
try:
value = df_row[self.io.defaults.spat_crop_cols[key]]
except KeyError: # try to retrieve a default value
# decide whehter to get default or not.. how?
# check the inverse to see if it is accesible
# try:
# value = self.io.defaults.crop_defaults[key]
# except KeyError:
# value = None
if 'crop' in key or 'buf' in key:
key_base = key[:key.find('_', key.rfind('_'))]
key_unit = key[key.find('_', key.rfind('_')):]
if key_unit == '_m':
key_unit_inv = '_pix'
elif key_unit == '_pix':
key_unit_inv = '_m'
try:
value_inv = df_row[self.io.defaults.spat_crop_cols[key_base+key_unit_inv]] # exists; set to NaN and carry on
value = None
except KeyError: # neither exist, gather default
try:
value = self.io.defaults.crop_defaults[key]
except KeyError:
value = None
else: # proceed as normal
try:
value = self.io.defaults.crop_defaults[key]
except KeyError:
value = None
# if key in ['crop_e_m', 'crop_n_m', 'crop_e_pix', 'crop_n_pix']:
# print('Key: {0} Value: {1}'.format(key, value))
return value
def _check_processed(self, fname_list, base_dir_out, folder_name,
name_append, append_extra=None, ext=None):
'''
Checks if any files in fname_list have already (presumably) undergone
processing. This is determined by checking if a file exists with a
particular name based on the filename in fname_list and naming
parameters (i.e,. ``folder_name`` and ``name_append``).
Parameters:
ext (``str``): e.g., '.spec'
'''
if append_extra is None:
append_extra = ''
fname_list_final = fname_list.copy()
for fname in fname_list:
if base_dir_out is None:
base_dir = os.path.split(fname)[0]
dir_out, name_append = self._save_file_setup(
base_dir, folder_name, name_append)
else:
dir_out, name_append = self._save_file_setup(
base_dir_out, folder_name, name_append)
name_print = self._get_name_print(fname)
if ext is None:
name_label = (name_print + name_append + append_extra + '.' +
self.io.defaults.envi_write.interleave)
else:
name_label = (name_print + name_append + append_extra + ext)
if os.path.isfile(os.path.join(dir_out, name_label)):
fname_list_final.remove(fname)
msg1 = ('There are no files to process. Please check if files have '
'already undergone processing. If existing files should be '
'overwritten, be sure to set the ``out_force`` parameter.\n')
msg2 = ('Processing {0} files. If existing files should be '
'overwritten, be sure to set the ``out_force`` parameter.\n'
''.format(len(fname_list_final)))
if not len(fname_list_final) > 0:
warnings.warn(msg1, UserWarning, stacklevel=0)
# else:
# print(msg2)
time.sleep(0.2) # when using progress bar, this keeps from splitting lines
return fname_list_final
def _crop_read_sheet(self, row):
'''
Reads the necessary information from the spreadsheet and saves it
to a dictionary
If this function causes an error, try checking
``batch.io.defaults.spat_crop_col`` - these should be adjusted
according to the default column names of the input (i.e.,
``fname_sheet``).
'''
crop_specs = {
'directory': self._try_spat_crop_col_key('directory', row),
'fname': self._try_spat_crop_col_key('fname', row),
'name_short': self._try_spat_crop_col_key('name_short', row),
'name_long': self._try_spat_crop_col_key('name_long', row),
'ext': self._try_spat_crop_col_key('ext', row),
'plot_id_ref': self._try_spat_crop_col_key('plot_id_ref', row),
'pix_e_ul': self._try_spat_crop_col_key('pix_e_ul', row),
'pix_n_ul': self._try_spat_crop_col_key('pix_n_ul', row),
'alley_size_e_m': self._try_spat_crop_col_key('alley_size_e_m', row),
'alley_size_n_m': self._try_spat_crop_col_key('alley_size_n_m', row),
'alley_size_e_pix': self._try_spat_crop_col_key('alley_size_e_pix', row),
'alley_size_n_pix': self._try_spat_crop_col_key('alley_size_n_pix', row),
'buf_e_m': self._try_spat_crop_col_key('buf_e_m', row),
'buf_n_m': self._try_spat_crop_col_key('buf_n_m', row),
'buf_e_pix': self._try_spat_crop_col_key('buf_e_pix', row),
'buf_n_pix': self._try_spat_crop_col_key('buf_n_pix', row),
'crop_e_m': self._try_spat_crop_col_key('crop_e_m', row),
'crop_n_m': self._try_spat_crop_col_key('crop_n_m', row),
'crop_e_pix': self._try_spat_crop_col_key('crop_e_pix', row),
'crop_n_pix': self._try_spat_crop_col_key('crop_n_pix', row),
'gdf_shft_e_pix': self._try_spat_crop_col_key('gdf_shft_e_pix', row),
'gdf_shft_n_pix': self._try_spat_crop_col_key('gdf_shft_n_pix', row),
'gdf_shft_e_m': self._try_spat_crop_col_key('gdf_shft_e_m', row),
'gdf_shft_n_m': self._try_spat_crop_col_key('gdf_shft_n_m', row),
'n_plots_x': self._try_spat_crop_col_key('n_plots_x', row),
'n_plots_y': self._try_spat_crop_col_key('n_plots_y', row),
'n_plots': self._try_spat_crop_col_key('n_plots', row)}
if crop_specs['fname'] is None:
try:
crop_specs['fname'] = (crop_specs['name_short'] +
crop_specs['name_long'] +
crop_specs['ext'])
except TypeError:
crop_specs['fname'] = None
if crop_specs['fname'] is not None:
base_name = os.path.basename(crop_specs['fname'])
if crop_specs['name_short'] is None:
crop_specs['name_short'] = base_name[
:base_name.find('-', base_name.rfind('_'))]
if crop_specs['name_long'] is None:
crop_specs['name_long'] = base_name[
base_name.find('-', base_name.rfind('_')):]
if crop_specs['ext'] is None:
crop_specs['ext'] = os.path.splitext(crop_specs['fname'])[1]
for col_name in row.index:
if col_name not in self.io.defaults.spat_crop_cols.keys():
crop_specs[col_name] = row[col_name]
if not pd.notnull(crop_specs['name_long']):
crop_specs['name_long'] = None
if not pd.notnull(crop_specs['plot_id_ref']):
crop_specs['plot_id_ref'] = None
if not pd.notnull(crop_specs['name_short']):
crop_specs['name_short'] = None
self.crop_specs = crop_specs
return crop_specs
def _pix_to_mapunit(self, crop_specs, spyfile=None):
'''
Looks over specifications of ``crop_specs``, and converts betweeen pixel
units and map units if one is populated and the other is ``None``
'''
cs = crop_specs.copy()
if spyfile is None:
spyfile = self.io.spyfile
spy_ps_e = float(spyfile.metadata['map info'][5])
spy_ps_n = float(spyfile.metadata['map info'][6])
# Crop size
# if cs['crop_e_pix'] is None and cs['crop_e_m'] is not None:
if pd.isnull(cs['crop_e_pix']) and pd.notnull(cs['crop_e_m']):
cs['crop_e_pix'] = int(cs['crop_e_m'] / spy_ps_e)
elif pd.notnull(cs['crop_e_pix']) and pd.isnull(cs['crop_e_m']):
cs['crop_e_m'] = cs['crop_e_pix'] * spy_ps_e
if pd.isnull(cs['crop_n_pix']) and pd.notnull(cs['crop_n_m']):
cs['crop_n_pix'] = int(cs['crop_n_m'] / spy_ps_n)
elif pd.notnull(cs['crop_n_pix']) and pd.isnull(cs['crop_n_m']):
cs['crop_n_m'] = cs['crop_n_pix'] * spy_ps_n
# Buffer
if pd.isnull(cs['buf_e_pix']) and pd.notnull(cs['buf_e_m']):
cs['buf_e_pix'] = int(cs['buf_e_m'] / spy_ps_e)
elif pd.notnull(cs['buf_e_pix']) and pd.isnull(cs['buf_e_m']):
cs['buf_e_m'] = cs['buf_e_pix'] * spy_ps_e
if pd.isnull(cs['buf_n_pix']) and pd.notnull(cs['buf_n_m']):
cs['buf_n_pix'] = int(cs['buf_n_m'] / spy_ps_e)
elif pd.notnull(cs['buf_n_pix']) and pd.isnull(cs['buf_n_m']):
cs['buf_n_m'] = cs['buf_n_pix'] * spy_ps_e
# Shift
if pd.isnull(cs['gdf_shft_e_pix']) and pd.notnull(cs['gdf_shft_e_m']):
cs['gdf_shft_e_pix'] = int(cs['gdf_shft_e_m'] / spy_ps_e)
elif pd.notnull(cs['gdf_shft_e_pix']) and pd.isnull(cs['gdf_shft_e_m']):
cs['gdf_shft_e_m'] = cs['gdf_shft_e_pix'] * spy_ps_e
if pd.isnull(cs['gdf_shft_n_pix']) and pd.notnull(cs['gdf_shft_n_m']):
cs['gdf_shft_n_pix'] = int(cs['gdf_shft_n_m'] / spy_ps_e)
elif pd.notnull(cs['gdf_shft_n_pix']) and pd.isnull(cs['gdf_shft_n_m']):
cs['gdf_shft_n_m'] = cs['gdf_shft_n_pix'] * spy_ps_e
# Alley size
if (pd.isnull(cs['alley_size_e_pix']) and
pd.notnull(cs['alley_size_e_m'])):
cs['alley_size_e_pix'] = int(cs['alley_size_e_m'] / spy_ps_e)
elif (pd.notnull(cs['alley_size_e_pix']) and
pd.isnull(cs['alley_size_e_m'])):
cs['alley_size_e_m'] = cs['alley_size_e_pix'] * spy_ps_e
if (pd.isnull(cs['alley_size_n_pix']) and
pd.notnull(cs['alley_size_n_m'])):
cs['alley_size_n_pix'] = int(cs['alley_size_n_m'] / spy_ps_n)
elif (pd.notnull(cs['alley_size_n_pix']) and
pd.isnull(cs['alley_size_n_m'])):
cs['alley_size_n_m'] = cs['alley_size_n_pix'] * spy_ps_n
self.crop_specs = cs
return cs
def _composite_band_setup(self, base_dir_out, fname, folder_name, name_append):
'''
'''
if base_dir_out is None:
base_dir = os.path.dirname(fname)
dir_out, name_append = self._save_file_setup(
base_dir, folder_name, name_append)
else:
dir_out, name_append = self._save_file_setup(
base_dir_out, folder_name, name_append)
name_print = self._get_name_print()
return dir_out, name_print, name_append
def _band_math_setup(self, base_dir_out, folder_name, fname, name_append,
method):
'''
'''
msg = ('``method`` must be one of either "ndi", "ratio", "derivative", '
'or "mcari2".\n')
assert method in ['ndi', 'ratio', 'derivative', 'mcari2'], msg
if base_dir_out is None:
base_dir = os.path.dirname(fname)
dir_out, name_append = self._save_file_setup(
base_dir, folder_name, name_append)
else:
dir_out, name_append = self._save_file_setup(
base_dir_out, folder_name, name_append)
name_print = self._get_name_print()
return dir_out, name_print, name_append
# if method == 'ndi':
# print('Calculating normalized difference index for: {0}'
# ''.format(name_print))
# elif method == 'ratio':
# print('Calculating simple ratio index for: {0}'
# ''.format(name_print))
# elif method == 'mcari2':
# print('Calculating MCARI2 index for: {0}'
# ''.format(name_print))
def _mask_stats_setup(self, mask_thresh, mask_percentile, mask_side):
'''
Parse thesholds and percentiles to dynamically set column names for
masked df_stats
'''
if mask_thresh is not None:
if not isinstance(mask_thresh, list):
mask_thresh = [mask_thresh]
mask_thresh_print = '-'.join([str(x) for x in mask_thresh])
if mask_percentile is not None:
if not isinstance(mask_percentile, list):
mask_percentile = [mask_percentile]
mask_pctl_print = '-'.join([str(x) for x in mask_percentile])
if mask_side is not None:
if not isinstance(mask_side, list):
mask_side = [mask_side]
mask_side_print = '-'.join([str(x) for x in mask_side])
if mask_thresh is not None and mask_percentile is not None:
type_mask = ('mask-{0}-thresh-{1}-pctl-{2}'.format(
mask_side_print, mask_thresh_print, mask_pctl_print))
elif mask_thresh is not None and mask_percentile is None:
type_mask = ('mask-{0}-thresh-{1}'.format(
mask_side_print, mask_thresh_print))
elif mask_thresh is None and mask_percentile is not None:
type_mask = ('mask-{0}-pctl-{1}'.format(
mask_side_print, mask_pctl_print))
columns = ['fname', 'plot_id', type_mask + '-count',
type_mask + '-mean', type_mask + '-stdev',
type_mask + '-median']
df_stats = pd.DataFrame(columns=columns)
return df_stats, type_mask
def _mask_single_stats(self, fname, array_bm, metadata_bm,
mask_thresh, mask_percentile, mask_side, df_stats):
'''
Creates the bandmath mask and summarizes the band math values after
masking unwanted pixels. Returns the single masked bandmath array and
the stats dataframe with the new image data appended as a row
'''
array_mask, metadata_bm = self.io.tools.mask_array(
array_bm, metadata_bm, thresh=mask_thresh,
percentile=mask_percentile, side=mask_side)
# array_mask, metadata_bm = hsbatch.io.tools.mask_array(
# array_bandmath1, metadata_bandmath1, thresh=mask_thresh,
# percentile=mask_percentile, side=mask_side)
# stat_mask_count = np.count_nonzero(~np.isnan(array_mask))
# all nan values should be masked from mask_array() function
stat_mask_count = array_mask.count()
stat_mask_mean = array_mask.mean()
stat_mask_std = array_mask.std()
stat_mask_med = np.ma.median(array_mask)
# stat_mask_mean = np.nanmean(array_mask)
# stat_mask_std = np.nanstd(array_mask)
# stat_mask_med = np.nanmedian(array_mask)
data = [fname, self.io.name_plot, stat_mask_count, stat_mask_mean,
stat_mask_std, stat_mask_med]
df_stats_temp = pd.DataFrame(data=[data], columns=df_stats.columns)
df_stats = df_stats.append(df_stats_temp, ignore_index=True)
return array_mask, df_stats
def _mask_two_step(self, mask_dir, mask_side, mask_thresh,
mask_percentile, fname, df_stats1, df_stats2,
name_label):
'''
Performs a two-step masking process. The masked masked
bandmath arrays and stats for each step are returned.
'''
msg1 = ('Either ``mask_thresh`` or ``mask_percentile`` is a '
'list, but ``mask_dir`` is not a list. If trying to '
'perform a "two-step" masking process, please be sure '
'to pass a list with length of two for both '
'``mask_dir`` and ``mask_side``, as well as either '
'for ``mask_thresh`` or ``mask_percentile``.\n'
'``mask_dir``: {0}\n``mask_side``: {1}'
''.format(mask_dir, mask_side))
msg2 = ('Either ``mask_thresh`` or ``mask_percentile`` is a '
'list, but ``mask_side`` is not a list. If trying to '
'perform a "two-step" masking process, please be sure '
'to pass a list with length of two for both '
'``mask_dir`` and ``mask_side``, as well as either '
'for ``mask_thresh`` or ``mask_percentile``.\n'
'``mask_dir``: {0}\n``mask_side``: {1}'
''.format(mask_dir, mask_side))
assert isinstance(mask_dir, list), msg1
assert isinstance(mask_side, list), msg2
array_bandmath1, metadata_bandmath1 = self._get_array_similar(
mask_dir[0])
array_bandmath2, metadata_bandmath2 = self._get_array_similar(
mask_dir[1])
if isinstance(mask_thresh, list):
array_mask1, df_stats1 = self._mask_single_stats(
fname, array_bandmath1, metadata_bandmath1,
mask_thresh[0], None, mask_side[0], df_stats1)
array_mask2, df_stats2 = self._mask_single_stats(
fname, array_bandmath2, metadata_bandmath2,
mask_thresh[1], None, mask_side[1], df_stats2)
elif isinstance(mask_percentile, list):
array_mask1, df_stats1 = self._mask_single_stats(
fname, array_bandmath1, metadata_bandmath1, None,
mask_percentile[0], mask_side[0], df_stats1)
array_mask2, df_stats2 = self._mask_single_stats(
fname, array_bandmath2, metadata_bandmath2, None,
mask_percentile[1], mask_side[1], df_stats2)
return array_mask1, array_mask2, df_stats1, df_stats2
def _execute_mask(self, fname_list, mask_dir, base_dir_out, folder_name,
name_append, write_datacube, write_spec, write_geotiff,
mask_thresh, mask_percentile, mask_side):
'''
Actually creates the mask to keep the main function a bit cleaner
'''
if mask_side == 'outside': # thresh/pctl will be a list, so take care of this first
df_stats1, type_mask1 = self._mask_stats_setup(
mask_thresh, mask_percentile, mask_side)
df_stats2 = None
type_mask2 = None
# if mask_side is not "outside" and thresh is list, then it's a 2-step
elif isinstance(mask_thresh, list):
if not isinstance(mask_side, list):
maskside = [mask_side, mask_side] # ensure that mask_side is two parts as well
df_stats1, type_mask1 = self._mask_stats_setup(mask_thresh[0], None, mask_side[0])
df_stats2, type_mask2 = self._mask_stats_setup(mask_thresh[1], None, mask_side[1])
elif isinstance(mask_percentile, list):
if not isinstance(mask_side, list):
maskside = [mask_side, mask_side] # ensure that mask_side is two parts as well
df_stats1, type_mask1 = self._mask_stats_setup(None, mask_percentile[0], mask_side[0])
df_stats2, type_mask2 = self._mask_stats_setup(None, mask_percentile[1], mask_side[1])
else:
df_stats1, type_mask1 = self._mask_stats_setup(mask_thresh, mask_percentile, mask_side)
df_stats2 = None
type_mask2 = None
fname_list_p = tqdm(fname_list) if self.progress_bar is True else fname_list
for idx, fname in enumerate(fname_list_p):
if self.progress_bar is True:
fname_list_p.set_description('Processing file {0}/{1}'.format(idx, len(fname_list)))
self.io.read_cube(fname)
metadata = self.io.spyfile.metadata.copy()
metadata_geotiff = self.io.spyfile.metadata.copy()
base_dir = os.path.dirname(fname)
if base_dir_out is None:
dir_out, name_append = self._save_file_setup(
base_dir, folder_name, name_append)
else:
dir_out, name_append = self._save_file_setup(
base_dir_out, folder_name, name_append)
name_print = self._get_name_print()
name_label = (name_print + name_append + '.' +
self.io.defaults.envi_write.interleave)
if self._file_exists_check(
dir_out, name_label, write_datacube=write_datacube,
write_spec=write_spec, write_geotiff=write_geotiff) is True:
continue
# array = self.io.spyfile.load()
array = self.io.spyfile.open_memmap()
if mask_dir is None:
mask_dir = os.path.join(self.io.base_dir, 'band_math')
if df_stats2 is not None:
array_mask1, array_mask2, df_stats1, df_stats2 =\
self._mask_two_step(mask_dir, mask_side, mask_thresh,
mask_percentile, fname, df_stats1,
df_stats2, name_label)
array_mask = np.logical_or(array_mask1.mask,
array_mask2.mask)
else: # things are much simpler
array_bandmath1, metadata_bandmath1 = self._get_array_similar(
mask_dir)
array_mask, df_stats1 = self._mask_single_stats(
fname, array_bandmath1, metadata_bandmath1, mask_thresh,
mask_percentile, mask_side, df_stats1)
array_mask = array_mask.mask
spec_mean, spec_std, datacube_masked = self.io.tools.mean_datacube(
array, array_mask)
self.spec_mean = spec_mean
self.spec_std = spec_std
hist_str = (" -> hs_process.batch.segment_create_mask[<"
"label: 'mask_thresh?' value:{0}; "
"label: 'mask_percentile?' value:{1}; "
"label: 'mask_side?' value:{2}>]"
"".format(mask_thresh, mask_percentile, mask_side))
metadata['history'] += hist_str
metadata_geotiff['history'] += hist_str
if write_datacube is True:
self._write_datacube(dir_out, name_label, datacube_masked,
metadata)
if write_spec is True:
name_label_spec = (os.path.splitext(name_label)[0] +
'-mean.spec')
self._write_spec(dir_out, name_label_spec, spec_mean, spec_std,
metadata)
self.array_mask = array_mask
if write_geotiff is True:
self._write_geotiff(array_mask, fname, dir_out, name_label,
metadata_geotiff, self.io.tools)
if len(df_stats1) > 0:
fname_stats1 = os.path.join(dir_out, type_mask1 + '.csv')
df_stats1.to_csv(fname_stats1, index=False)
if df_stats2 is not None:
if len(df_stats2) > 0:
# fname_csv2 = 'mask-stats2.csv'
fname_stats2 = os.path.join(dir_out, type_mask2 + '.csv')
df_stats2.to_csv(fname_stats2, index=False)
# # should we make an option to save a mean spectra as well?
# # Yes - we aren't required to save intermediate results and do
# # another batch process..? we get everything done in one shot -
# # after all, why do we want to do band math if we aren't also
# # calculating the average of the area (unless cropping hasn't
# # been perfomed yet)?
# # No - Keep it simpler and keep batch functions more specific in
# # their capabilities (e.g., batch.band_math, batch.mask_array,
# # batch.veg_spectra)
#
def _write_stats(self, dir_out, df_stats, fname_csv='stats.csv'):
'''
Writes df_stats to <dir_out>, ensuring lock is in place if it exists to
work as expected with parallel processing.
'''
fname_stats = os.path.join(dir_out, fname_csv)
if self.lock is not None:
with self.lock:
if os.path.isfile(fname_stats):
df_stats_in = pd.read_csv(fname_stats)
df_stats = df_stats_in.append(df_stats)
df_stats.to_csv(fname_stats, index=False)
else:
if os.path.isfile(fname_stats):
df_stats_in = pd.read_csv(fname_stats)
df_stats = df_stats_in.append(df_stats)
df_stats.to_csv(fname_stats, index=False)
def _execute_composite_band(self, fname_list, base_dir_out, folder_name,
name_append, write_geotiff, wl1, b1,
list_range, plot_out):
'''
Actually executes the composit band to keep the main function a bit
cleaner
'''
type_bm = '-comp-{0}'.format(int(np.mean(wl1)))
columns = ['fname', 'plot_id', 'count', 'mean', 'std_dev', 'median',
'pctl_10th', 'pctl_25th', 'pctl_50th', 'pctl_75th',
'pctl_90th', 'pctl_95th']
df_stats = pd.DataFrame(columns=columns)
fname_list_p = tqdm(fname_list) if self.progress_bar is True else fname_list
for idx, fname in enumerate(fname_list_p):
if self.progress_bar is True:
fname_list_p.set_description('Processing file {0}/{1}'.format(idx, len(fname_list)))
self.io.read_cube(fname)
dir_out, name_print, name_append = self._composite_band_setup(
base_dir_out, fname, folder_name, name_append)
self.my_segment = segment(self.io.spyfile)
name_label = (name_print + name_append + type_bm + '.{0}'
''.format(self.io.defaults.envi_write.interleave))
if self._file_exists_check(
dir_out, name_label, write_datacube=True,
write_geotiff=write_geotiff, write_plot=plot_out) is True:
continue
array_b1, metadata = self.my_segment.composite_band(
wl1=wl1, b1=b1, list_range=list_range, print_out=False)
stat_count = np.count_nonzero(~np.isnan(array_b1))
stat_mean = np.nanmean(array_b1)
stat_std = np.nanstd(array_b1)
stat_med = np.nanmedian(array_b1)
stat_pctls = np.nanpercentile(array_b1, [10, 25, 50, 75, 90, 95])
data = [fname, self.io.name_plot, stat_count, stat_mean, stat_std,
stat_med, stat_pctls[0], stat_pctls[1], stat_pctls[2],
stat_pctls[3], stat_pctls[4], stat_pctls[5]]
df_stats_temp = pd.DataFrame(data=[data], columns=columns)
df_stats = df_stats.append(df_stats_temp, ignore_index=True)
if plot_out is True:
fname_fig = os.path.join(dir_out,
os.path.splitext(name_label)[0] +
'.png')
self.io.tools.plot_histogram(
array_b1, fname_fig=fname_fig, title=name_print,
xlabel=array_b1.upper(), percentile=90, bins=50,
fontsize=14, color='#444444')
metadata['label'] = name_label
self._write_datacube(dir_out, name_label, array_b1, metadata)
if write_geotiff is True:
self._write_geotiff(array_b1, fname, dir_out, name_label,
metadata, self.my_segment.tools)
if len(df_stats) > 0:
self._write_stats(dir_out, df_stats, fname_csv=name_append[1:] + '-stats.csv')
def _execute_band_math(self, fname_list, base_dir_out, folder_name,
name_append, write_geotiff, method, wl1, wl2, wl3, b1, b2,
b3, list_range, plot_out):
'''
Actually executes the band math to keep the main function a bit
cleaner
'''
if method == 'ndi' or method == 'ratio':
type_bm = ('{0}-{1}-{2}'.format(method, int(np.mean(wl1)),
int(np.mean(wl2))))
elif method == 'derivative':
type_bm = ('{0}-{1}-{2}-{3}'.format(method, int(np.mean(wl1)),
int(np.mean(wl2)),
int(np.mean(wl2))))
elif method == 'mcari2':
type_bm = ('{0}-{1}-{2}-{3}'.format(method, int(np.mean(wl1)),
int(np.mean(wl2)),
int(np.mean(wl2))))
columns = ['fname', 'plot_id', 'count', 'mean', 'std_dev', 'median',
'pctl_10th', 'pctl_25th', 'pctl_50th', 'pctl_75th',
'pctl_90th', 'pctl_95th']
df_stats = pd.DataFrame(columns=columns)
fname_list_p = tqdm(fname_list) if self.progress_bar is True else fname_list
for idx, fname in enumerate(fname_list_p):
if self.progress_bar is True:
fname_list_p.set_description('Processing file {0}/{1}'.format(idx, len(fname_list)))
self.io.read_cube(fname)
dir_out, name_print, name_append = self._band_math_setup(
base_dir_out, folder_name, fname, name_append, method)
self.my_segment = segment(self.io.spyfile)
if method == 'ndi':
name_label = (name_print + name_append + '-{0}-{1}-{2}.{3}'
''.format(method, int(np.mean(wl1)),
int(np.mean(wl2)),
self.io.defaults.envi_write.interleave))
if self._file_exists_check(
dir_out, name_label, write_datacube=True,
write_geotiff=write_geotiff,
write_plot=plot_out) is True:
continue
array_bm, metadata = self.my_segment.band_math_ndi(
wl1=wl1, wl2=wl2, b1=b1, b2=b2, list_range=list_range,
print_out=False)
elif method == 'ratio':
name_label = (name_print + name_append + '-{0}-{1}-{2}.{3}'
''.format(method, int(np.mean(wl1)),
int(np.mean(wl2)),
self.io.defaults.envi_write.interleave))
if self._file_exists_check(
dir_out, name_label, write_datacube=True,
write_geotiff=write_geotiff,
write_plot=plot_out) is True:
continue
array_bm, metadata = self.my_segment.band_math_ratio(
wl1=wl1, wl2=wl2, b1=b1, b2=b2, list_range=list_range,
print_out=False)
elif method == 'derivative':
name_label = (name_print + name_append + '-{0}-{1}-{2}-{3}.{4}'
''.format(method, int(np.mean(wl1)),
int(np.mean(wl2)),
int(np.mean(wl3)),
self.io.defaults.envi_write.interleave))
if self._file_exists_check(
dir_out, name_label, write_datacube=True,
write_geotiff=write_geotiff,
write_plot=plot_out) is True:
continue
array_bm, metadata = self.my_segment.band_math_derivative(
wl1=wl1, wl2=wl2, wl3=wl3, b1=b1, b2=b2, b3=b3,
list_range=list_range, print_out=False)
elif method == 'mcari2':
name_label = (name_print + name_append + '-{0}-{1}-{2}-{3}.{4}'
''.format(method, int(np.mean(wl1)),
int(np.mean(wl2)),
int(np.mean(wl3)),
self.io.defaults.envi_write.interleave))
if self._file_exists_check(
dir_out, name_label, write_datacube=True,
write_geotiff=write_geotiff,
write_plot=plot_out) is True:
continue
array_bm, metadata = self.my_segment.band_math_mcari2(
wl1=wl1, wl2=wl2, wl3=wl3, b1=b1, b2=b2, b3=b3,
list_range=list_range, print_out=False)
stat_count = np.count_nonzero(~np.isnan(array_bm))
stat_mean = np.nanmean(array_bm)
stat_std = np.nanstd(array_bm)
stat_med = np.nanmedian(array_bm)
stat_pctls = np.nanpercentile(array_bm, [10, 25, 50, 75, 90, 95])
data = [fname, self.io.name_plot, stat_count, stat_mean, stat_std,
stat_med, stat_pctls[0], stat_pctls[1], stat_pctls[2],
stat_pctls[3], stat_pctls[4], stat_pctls[5]]
df_stats_temp = pd.DataFrame(data=[data], columns=columns)
df_stats = df_stats.append(df_stats_temp, ignore_index=True)
if plot_out is True:
fname_fig = os.path.join(dir_out,
os.path.splitext(name_label)[0] +
'.png')
self.io.tools.plot_histogram(
array_bm, fname_fig=fname_fig, title=name_print,
xlabel=type_bm.upper(), percentile=90, bins=50,
fontsize=14, color='#444444')
metadata['label'] = name_label
self._write_datacube(dir_out, name_label, array_bm, metadata)
if write_geotiff is True:
self._write_geotiff(array_bm, fname, dir_out, name_label,
metadata, self.my_segment.tools)
if len(df_stats) > 0:
self._write_stats(dir_out, df_stats, fname_csv=name_append[1:] + '-stats.csv')
def _get_ndvi_simple(self, df_class_spec, n_classes, plot_out=True):
'''
Find kmeans class with lowest NDVI, which represents the soil class
'''
nir_b = self.io.tools.get_band(760)
re_b = self.io.tools.get_band(715)
red_b = self.io.tools.get_band(681)
green_b = self.io.tools.get_band(555)
nir = df_class_spec.iloc[nir_b]
re = df_class_spec.iloc[re_b]
red = df_class_spec.iloc[red_b]
green = df_class_spec.iloc[green_b]
df_ndvi = (nir-red)/(nir+red)
class_soil = df_ndvi[df_ndvi == df_ndvi.min()].index[0]
class_veg = df_ndvi[df_ndvi == df_ndvi.max()].index[0]
if plot_out is True:
df_class_spec['wavelength'] = self.io.tools.meta_bands.values()
fig, ax = plt.subplots()
sns.lineplot(data=df_class_spec, ax=ax)
legend = ax.legend()
legend.set_title('K-means classes')
legend.texts[class_soil].set_text('Soil')
legend.texts[class_veg].set_text('Vegetation')
return class_soil, class_veg
def _crop_check_input(self, fname_sheet, fname_list, method):
'''
Checks that either `fname_sheet` or `fname_list` were passed (and not
both)
'''
if fname_sheet is not None:
if isinstance(fname_sheet, pd.DataFrame) and pd.isnull(fname_list):
df_plots = fname_sheet
elif os.path.splitext(fname_sheet)[-1] == '.csv' and pd.isnull(fname_list):
df_plots = pd.read_csv(fname_sheet)
elif fname_list is not None:
msg2 = ('Both ``fname_sheet`` and ``fname_list`` were passed. '
'``fname_list`` (perhaps from ``base_dir``) will be '
'ignored.\n')
print(msg2)
if isinstance(fname_sheet, pd.DataFrame):
df_plots = fname_sheet
elif os.path.splitext(fname_sheet)[-1] == '.csv':
df_plots = pd.read_csv(fname_sheet)
return df_plots
elif pd.isnull(fname_sheet) and all(pd.isnull(fname_list)):
msg1 = ('Neither ``fname_sheet`` nor ``fname_list`` were passed. '
'Please pass one or the other (not both) and run '
'``batch.spatial_crop`` again.\n')
raise TypeError(msg1)
else: # fname_list was passed and df_plots will be figured out later
msg3 = ('``method`` is "single", but ``fname_list`` was passed '
'instead of ``fname_sheet``.\n\nIf performing '
'``crop_single``, please pass ``fname_sheet``.\n\nIf '
'performing ``crop_many_gdf``, please pass ``fname_list`` '
'(perhaps via ``base_dir``).\n')
assert method in ['many_grid', 'many_gdf'], msg3
return
def _file_exists_check(self, dir_out, name_label,
write_datacube=False, write_spec=False,
write_geotiff=False, write_plot=False):
'''
Checks if all files to be created exist already; if so, returns True;
if not, returns False.
'''
if self.io.defaults.envi_write.force is True:
return False
write_dict = {'write_datacube': write_datacube,
'write_spec': write_spec,
'write_geotiff': write_geotiff,
'write_plot': write_plot}
ext_dict = {'write_datacube': '.bip',
'write_spec': '-mean.spec',
'write_geotiff': '.tif',
'write_plot': '.png'}
msg = ('Skipping file - it appears as if this image has already '
'been processed. Overwrite files by passing out_force=True\n'
'Filename (short): {0}'.format(name_label))
for key in write_dict.keys():
if write_dict[key] is True:
fname = os.path.splitext(
os.path.join(dir_out, name_label))[0] + ext_dict[key]
if not os.path.isfile(fname): # if we need the file and it doesn't exist, we're done checking
return False
# if we get here without already exiting, all files should exist
print(msg)
return True
def _crop_loop(self, df_plots, gdf, base_dir_out, folder_name,
name_append, write_geotiff):
'''
``df_plots`` is assumed to contain all the necessary information to
crop *each plot* from an image or from multiple images. In other words,
_crop_loop() will perform a single cropping procedure (via
``spatial_mod.crop_single()``) for each row in ``df_plots``. Thus,
all the necessary information should be contained in df_plots to run
crop_single(). This function is not meant for dataframes containing
information to perform crop_many(), so be sure to hone in on that
information before passing ``_crop_loop``.
'''
df_iter = tqdm(df_plots.iterrows(), total=df_plots.shape[0]) if self.progress_bar is True else df_plots.iterrows()
for idx, row in df_iter:
if self.progress_bar is True:
df_iter.set_description('Processing file {0}/{1}'.format(idx, len(df_plots)))
cs = self._crop_read_sheet(row)
fname = os.path.join(cs['directory'], cs['fname'])
# print('\nSpatially cropping: {0}'.format(fname))
name_long = cs['name_long'] # ``None`` if it was never set
plot_id_ref = cs['plot_id_ref']
name_short = cs['name_short']
fname_hdr = fname + '.hdr'
self.io.read_cube(fname_hdr, name_long=name_long,
name_plot=plot_id_ref, name_short=name_short)
self.my_spatial_mod = spatial_mod(
self.io.spyfile, gdf, base_dir=self.io.base_dir, name_long=self.io.name_long,
name_short=self.io.name_short)
self.my_spatial_mod.defaults = self.io.defaults
if base_dir_out is None:
dir_out, name_append = self._save_file_setup(
cs['directory'], folder_name, name_append)
else:
dir_out, name_append = self._save_file_setup(
base_dir_out, folder_name, name_append)
name_print = self._get_name_print()
name_label = self._get_name_label(row, name_print, name_append)
if self._file_exists_check(
dir_out, name_label, write_datacube=True,
write_geotiff=write_geotiff) is True:
continue
cs = self._pix_to_mapunit(cs)
self.cs = cs
# if method == 'single':
# print(cs)
array_crop, metadata = self.my_spatial_mod.crop_single(
pix_e_ul=cs['pix_e_ul'], pix_n_ul=cs['pix_n_ul'],
crop_e_pix=cs['crop_e_pix'], crop_n_pix=cs['crop_n_pix'],
buf_e_pix=cs['buf_e_pix'], buf_n_pix=cs['buf_n_pix'],
gdf_shft_e_pix=cs['gdf_shft_e_pix'], gdf_shft_n_pix=cs['gdf_shft_n_pix'],
plot_id_ref=plot_id_ref, gdf=gdf)
fname = os.path.join(cs['directory'], cs['fname'])
self._write_datacube(dir_out, name_label, array_crop, metadata)
if write_geotiff is True:
self._write_geotiff(array_crop, fname, dir_out, name_label,
metadata, self.my_spatial_mod.tools,
show_img=False)
def _append_cropping_details(self, df_plots_many, row):
'''
Appends all "row" columns to df_plots_many so they carry through
'''
for col in row.keys():
if pd.notnull(row[col]) and col not in df_plots_many.columns:
df_plots_many[col] = row[col]
return df_plots_many
def _crop_many_read_row(self, row, gdf, method):
'''
Helper function for reading a row of a dataframe with information about
how to crop an image many times
'''
cs = self._crop_read_sheet(row) # this function creates cs['fname']
fname_in = os.path.join(cs['directory'], cs['fname'])
print('Filename: {0}'.format(fname_in))
name_long = cs['name_long'] # ``None`` if it was never set
plot_id_ref = cs['plot_id_ref']
name_short = cs['name_short']
fname_hdr = fname_in + '.hdr'
self.io.read_cube(fname_hdr, name_long=name_long,
name_plot=plot_id_ref, name_short=name_short)
self.my_spatial_mod = spatial_mod(
self.io.spyfile, gdf, base_dir=self.io.base_dir, name_long=self.io.name_long,
name_short=self.io.name_short)
self.my_spatial_mod.defaults = self.io.defaults
if method == 'many_gdf':
df_plots_many = self._many_gdf(cs)
elif method == 'many_grid':
df_plots_many = self._many_grid(cs)
else:
msg = ('``method`` must be either "many_gdf" or "many_grid".\n'
'Method: {0}'.format(method))
raise ValueError(msg)
df_plots_many = self._append_cropping_details(df_plots_many, row)
return df_plots_many
def _many_grid(self, cs):
'''Wrapper to get consice access to ``spatial_mod.crop_many_grid()'''
df_plots = self.my_spatial_mod.crop_many_grid(
cs['plot_id_ref'], pix_e_ul=cs['pix_e_ul'], pix_n_ul=cs['pix_n_ul'],
crop_e_m=cs['crop_e_m'], crop_n_m=cs['crop_n_m'],
alley_size_n_m=cs['alley_size_n_m'], buf_e_m=cs['buf_e_m'],
buf_n_m=cs['buf_n_m'], n_plots_x=cs['n_plots_x'],
n_plots_y=cs['n_plots_y'])
return df_plots
def _many_gdf(self, cs):
'''
Wrapper to get consice access to ``spatial_mod.crop_many_gdf();
``my_spatial_mod`` already has access to ``spyfile`` and ``gdf``, so no
need to pass them here.
If the buffer settings are None, but there are default settings for
them, they are passed here
'''
if cs['plot_id_ref'] is None:
cs['plot_id_ref'] = self.io.defaults.crop_defaults.plot_id_ref
df_plots = self.my_spatial_mod.crop_many_gdf(
plot_id_ref=cs['plot_id_ref'], pix_e_ul=cs['pix_e_ul'],
pix_n_ul=cs['pix_n_ul'],
crop_e_m=cs['crop_e_m'], crop_n_m=cs['crop_n_m'],
crop_e_pix=cs['crop_e_pix'], crop_n_pix=cs['crop_n_pix'],
buf_e_m=cs['buf_e_m'], buf_n_m=cs['buf_n_m'],
buf_e_pix=cs['buf_e_pix'], buf_n_pix=cs['buf_n_pix'],
gdf_shft_e_m=cs['gdf_shft_e_m'], gdf_shft_n_m=cs['gdf_shft_n_m'],
gdf_shft_e_pix=cs['gdf_shft_e_pix'],
gdf_shft_n_pix=cs['gdf_shft_n_pix'],
n_plots=cs['n_plots'])
return df_plots
def _crop_check_files(self, df_plots):
'''
If file already exists and out_force is False, removes that file from
the df_plots
'''
df_plots.reset_index(inplace=True)
df_plots_out = df_plots.copy()
for idx, row in df_plots.iterrows():
fname = os.path.join(row['directory'], row['name_short'] +
row['name_long'] + row['ext'])
if os.path.isfile(fname):
df_plots_out.drop(idx, inplace=True)
df_plots_out.reset_index(inplace=True)
return df_plots_out
def _execute_crop(self, fname_sheet, fname_list, base_dir_out, folder_name,
name_append, write_geotiff, method, gdf):
'''
Actually executes the spatial crop to keep the main function a bit
cleaner
Either `fname_sheet` or `fname_list` should be None
'''
df_plots = self._crop_check_input(fname_sheet, fname_list, method)
# if not pd.isnull(df_plots):
if df_plots is not None and not df_plots.empty:
if 'date' in df_plots.columns and isinstance(df_plots['date'], str):
df_plots['date'] = pd.to_datetime(df_plots['date'])
if method == 'single':
# self._crop_loop(df_plots)
self._crop_loop(df_plots, gdf, base_dir_out, folder_name,
name_append, write_geotiff)
elif method == 'many_gdf' and isinstance(df_plots, pd.DataFrame):
# if user passes a dataframe, just do whatever it says..
# loop through each row, doing crop_many_gdf() on each row with
# whatever parameters are passed via the columns..
# we should assume that each row of df_plots contains an image that
# should have crop_many_gdf performed on it to create a new
# dataframe that can be passed to _crop_loop()
for idx, row in df_plots.iterrows():
print('Computing information to spatially crop via '
'``spatial_mod.crop_many_gdf``:')
df_plots_many = self._crop_many_read_row(row, gdf, method)
self.df_plots_many = df_plots_many
self._crop_loop(df_plots_many, gdf, base_dir_out, folder_name,
name_append, write_geotiff)
elif method == 'many_gdf' and df_plots is None:
print('Because ``fname_list`` was passed instead of '
'``fname_sheet``, there is not a way to infer the study '
'name and date. Therefore, "study" and "date" will be '
'omitted from the output file name. If you would like '
'output file names to include "study" and "date", please '
'pass ``fname_sheet`` with "study" and "date" columns.\n')
for fname_in in fname_list:
self.io.read_cube(fname_in)
self.my_spatial_mod = spatial_mod(
self.io.spyfile, gdf, base_dir=self.io.base_dir,
name_long=self.io.name_long,
name_short=self.io.name_short)
# as long as defaults are set ahead of time, they should carry through
# e.g., batch.io.defaults.crop_defaults.n_plots = 40 to limit to 40 plots
self.my_spatial_mod.defaults = self.io.defaults
df_plots_many = self.my_spatial_mod.crop_many_gdf()
self._crop_loop(df_plots_many, gdf, base_dir_out, folder_name,
name_append, write_geotiff)
elif method == 'many_grid' and isinstance(df_plots, pd.DataFrame):
for idx, row in df_plots.iterrows():
print('\nComputing information to spatially crop via '
'``spatial_mod.crop_many_grid``:')
df_plots_many = self._crop_many_read_row(row, gdf, method)
self._crop_loop(df_plots_many, gdf, base_dir_out, folder_name,
name_append, write_geotiff)
else:
msg = ('Either ``method`` or ``df_plots`` are not defined '
'correctly. If using "many_grid" method, please be sure '
'``df_plots`` is being populated correcty\n\n``method``: '
'{0}'.format(method))
raise ValueError(msg)
def _write_datacube(self, dir_out, name_label, array, metadata):
'''
Writes a datacube to file using ``hsio.write_cube()``
'''
metadata['label'] = name_label
hdr_file = os.path.join(dir_out, name_label + '.hdr')
self.io.write_cube(hdr_file, array, metadata=metadata,
dtype=self.io.defaults.envi_write.dtype,
force=self.io.defaults.envi_write.force,
ext=self.io.defaults.envi_write.ext,
interleave=self.io.defaults.envi_write.interleave,
byteorder=self.io.defaults.envi_write.byteorder)
def _write_geotiff(self, array, fname, dir_out, name_label, metadata,
tools, show_img=False):
metadata['label'] = name_label
msg = ('Projection and Geotransform information are required for '
'writing the geotiff. This comes from the input filename, '
'so please be sure the correct filename is passed to '
'``fname``.\n')
assert fname is not None and os.path.isfile(fname), msg
fname_tif = os.path.join(dir_out,
os.path.splitext(name_label)[0] + '.tif')
img_ds = self.io._read_envi_gdal(fname_in=fname)
projection_out = img_ds.GetProjection()
# geotransform_out = img_ds.GetGeotransform()
img_ds = None # I only want to use GDAL when I have to..
map_set = metadata['map info']
ul_x_utm = tools.get_meta_set(map_set, 3)
ul_y_utm = tools.get_meta_set(map_set, 4)
size_x_m = tools.get_meta_set(map_set, 5)
size_y_m = tools.get_meta_set(map_set, 6)
# Note the last pixel size must be negative to begin at upper left
geotransform_out = [ul_x_utm, size_x_m, 0.0, ul_y_utm, 0.0,
-size_y_m]
self.io.write_tif(fname_tif, spyfile=array,
projection_out=projection_out,
geotransform_out=geotransform_out,
show_img=show_img)
def _write_spec(self, dir_out, name_label, spec_mean, spec_std,
metadata, folder_name=None):
'''
folder_name added to support an extra level on foler if desired. If
left to ``None``, spec will be written to dir_out, but if a str is
passed, spec will be written to os.path.join(dir_out, str).
'''
metadata['label'] = name_label
if folder_name: # else just use dir_out as is
dir_out, _ = self._save_file_setup(dir_out, folder_name, None)
hdr_file = os.path.join(dir_out, name_label + '.hdr')
self.io.write_spec(hdr_file, spec_mean, spec_std,
dtype=self.io.defaults.envi_write.dtype,
force=self.io.defaults.envi_write.force,
ext=self.io.defaults.envi_write.ext,
interleave=self.io.defaults.envi_write.interleave,
byteorder=self.io.defaults.envi_write.byteorder,
metadata=metadata)
def _execute_spec_clip(self, fname_list, base_dir_out, folder_name,
name_append, wl_bands):
'''
Actually executes the spectral clip to keep the main function a bit
cleaner
'''
fname_list_p = tqdm(fname_list) if self.progress_bar is True else fname_list
for idx, fname in enumerate(fname_list_p):
if self.progress_bar is True:
fname_list_p.set_description('Processing file {0}/{1}'.format(idx, len(fname_list)))
# print('\nSpectrally clipping: {0}'.format(fname))
# options for io.read_cube():
# name_long, name_plot, name_short, individual_plot, overwrite
self.io.read_cube(fname)
self.my_spectral_mod = spec_mod(self.io.spyfile)
base_dir = os.path.dirname(fname)
if base_dir_out is None:
dir_out, name_append = self._save_file_setup(
base_dir, folder_name, name_append)
else:
dir_out, name_append = self._save_file_setup(
base_dir_out, folder_name, name_append)
name_print = self._get_name_print()
array_clip, metadata = self.my_spectral_mod.spectral_clip(
wl_bands=wl_bands)
name_label = (name_print + name_append + '.' +
self.io.defaults.envi_write.interleave)
metadata['label'] = name_label
hdr_file = os.path.join(dir_out, name_label + '.hdr')
self.io.write_cube(hdr_file, array_clip,
dtype=self.io.defaults.envi_write.dtype,
force=self.io.defaults.envi_write.force,
ext=self.io.defaults.envi_write.ext,
interleave=self.io.defaults.envi_write.interleave,
byteorder=self.io.defaults.envi_write.byteorder,
metadata=metadata)
def _execute_spec_clip_pp(self, fname, base_dir_out, folder_name, name_append, wl_bands):
'''
Actually executes the spectral clip to keep the main function a bit
cleaner
'''
# print('Arglist: {0}'.format(arg_list))
# fname, base_dir_out, folder_name, name_append, wl_bands = arg_list
print('\nSpectrally clipping: {0}'.format(fname))
# options for io.read_cube():
# name_long, name_plot, name_short, individual_plot, overwrite
self.io.read_cube(fname)
self.my_spectral_mod = spec_mod(self.io.spyfile)
base_dir = os.path.dirname(fname)
if base_dir_out is None:
dir_out, name_append = self._save_file_setup(
base_dir, folder_name, name_append)
else:
dir_out, name_append = self._save_file_setup(
base_dir_out, folder_name, name_append)
name_print = self._get_name_print()
array_clip, metadata = self.my_spectral_mod.spectral_clip(
wl_bands=wl_bands)
name_label = (name_print + name_append + '.' +
self.io.defaults.envi_write.interleave)
metadata['label'] = name_label
hdr_file = os.path.join(dir_out, name_label + '.hdr')
self.io.write_cube(hdr_file, array_clip,
dtype=self.io.defaults.envi_write.dtype,
force=self.io.defaults.envi_write.force,
ext=self.io.defaults.envi_write.ext,
interleave=self.io.defaults.envi_write.interleave,
byteorder=self.io.defaults.envi_write.byteorder,
metadata=metadata)
def _execute_spec_combine(self, fname_list, base_dir_out):
'''
Actually executes the spectra combine to keep the main function a bit
cleaner
'''
df_specs = None
if base_dir_out is None:
base_dir_out = os.path.dirname(fname_list[0])
pix_n = 0
for fname in fname_list:
self.io.read_spec(fname)
spy_mem = self.io.spyfile_spec.open_memmap()
pix_n += (np.count_nonzero(~np.isnan(spy_mem)) /
self.io.spyfile_spec.nbands)
print('Combining datacubes/spectra into a single mean spectra.\n'
'Number of input datacubes/spectra: {0}\nTotal number of '
'pixels: {1}'
''.format(len(fname_list), int(pix_n)))
for fname in fname_list:
self.io.read_spec(fname)
array = self.io.spyfile_spec.load()
if len(array.shape) == 3:
pixels = array.reshape((array.shape[0]*array.shape[1]),
array.shape[2])
else:
pixels = array.reshape((array.shape[0]), array.shape[2])
if df_specs is None:
df_specs = pd.DataFrame(pixels, dtype=float)
else:
df_temp = pd.DataFrame(pixels, dtype=float)
df_specs = df_specs.append(df_temp, ignore_index=True)
self.df_mean = df_specs.mean()
self.df_mean = self.df_mean.rename('mean')
self.df_std = df_specs.std()
self.df_std = self.df_std.rename('std')
df_cv = self.df_mean / self.df_std
df_cv = df_cv.rename('cv')
hdr_file = os.path.join(base_dir_out, 'spec_mean_spy.spec.hdr')
self.io.write_spec(hdr_file, self.df_mean, self.df_std,
dtype=self.io.defaults.envi_write.dtype,
force=self.io.defaults.envi_write.force,
ext=self.io.defaults.envi_write.ext,
interleave=self.io.defaults.envi_write.interleave,
byteorder=self.io.defaults.envi_write.byteorder,
metadata=self.io.spyfile_spec.metadata)
def _execute_spec_mimic(self, fname_list, base_dir_out, folder_name,
name_append, sensor, df_band_response, col_wl,
center_wl):
'''
Actually executes the spectral resample to keep the main function a bit
cleaner.
'''
fname_list_p = tqdm(fname_list) if self.progress_bar is True else fname_list
for idx, fname in enumerate(fname_list_p):
if self.progress_bar is True:
fname_list_p.set_description('Processing file {0}/{1}'
''.format(idx, len(fname_list)))
self.io.read_cube(fname)
self.my_spectral_mod = spec_mod(self.io.spyfile)
base_dir = os.path.dirname(fname)
if base_dir_out is None:
dir_out, name_append = self._save_file_setup(
base_dir, folder_name, name_append)
else:
dir_out, name_append = self._save_file_setup(
base_dir_out, folder_name, name_append)
name_print = self._get_name_print()
array_mimic, metadata = self.my_spectral_mod.spectral_mimic(
sensor=sensor, df_band_response=df_band_response,
col_wl=col_wl, center_wl=center_wl)
name_label = (name_print + name_append + '.' +
self.io.defaults.envi_write.interleave)
metadata['label'] = name_label
hdr_file = os.path.join(dir_out, name_label + '.hdr')
self.io.write_cube(hdr_file, array_mimic,
dtype=self.io.defaults.envi_write.dtype,
force=self.io.defaults.envi_write.force,
ext=self.io.defaults.envi_write.ext,
interleave=self.io.defaults.envi_write.interleave,
byteorder=self.io.defaults.envi_write.byteorder,
metadata=metadata)
def _execute_spec_resample(self, fname_list, base_dir_out, folder_name,
name_append, bandwidth, bins_n):
'''
Actually executes the spectral resample to keep the main function a bit
cleaner.
'''
fname_list_p = tqdm(fname_list) if self.progress_bar is True else fname_list
for idx, fname in enumerate(fname_list_p):
if self.progress_bar is True:
fname_list_p.set_description('Processing file {0}/{1}'
''.format(idx, len(fname_list)))
self.io.read_cube(fname)
self.my_spectral_mod = spec_mod(self.io.spyfile)
base_dir = os.path.dirname(fname)
if base_dir_out is None:
dir_out, name_append = self._save_file_setup(
base_dir, folder_name, name_append)
else:
dir_out, name_append = self._save_file_setup(
base_dir_out, folder_name, name_append)
name_print = self._get_name_print()
array_bin, metadata = self.my_spectral_mod.spectral_resample(
bandwidth=bandwidth, bins_n=bins_n)
name_label = (name_print + name_append + '.' +
self.io.defaults.envi_write.interleave)
metadata['label'] = name_label
hdr_file = os.path.join(dir_out, name_label + '.hdr')
self.io.write_cube(hdr_file, array_bin,
dtype=self.io.defaults.envi_write.dtype,
force=self.io.defaults.envi_write.force,
ext=self.io.defaults.envi_write.ext,
interleave=self.io.defaults.envi_write.interleave,
byteorder=self.io.defaults.envi_write.byteorder,
metadata=metadata)
def _execute_spec_smooth(self, fname_list, base_dir_out, folder_name,
name_append, window_size, order, stats):
'''
Actually executes the spectral smooth to keep the main function a bit
cleaner
'''
if stats is True:
df_smooth_stats = pd.DataFrame(
columns=['fname', 'mean', 'std', 'cv'])
fname_list_p = tqdm(fname_list) if self.progress_bar is True else fname_list
for idx, fname in enumerate(fname_list_p):
if self.progress_bar is True:
fname_list_p.set_description('Processing file {0}/{1}'.format(idx, len(fname_list)))
# print('\nSpectrally smoothing: {0}'.format(fname))
self.io.read_cube(fname)
self.my_spectral_mod = spec_mod(self.io.spyfile)
base_dir = os.path.dirname(fname)
if base_dir_out is None:
dir_out, name_append = self._save_file_setup(
base_dir, folder_name, name_append)
else:
dir_out, name_append = self._save_file_setup(
base_dir_out, folder_name, name_append)
name_print = self._get_name_print()
array_smooth, metadata = self.my_spectral_mod.spectral_smooth(
window_size=window_size, order=order)
name_label = (name_print + name_append + '.' +
self.io.defaults.envi_write.interleave)
metadata['label'] = name_label
hdr_file = os.path.join(dir_out, name_label + '.hdr')
self.io.write_cube(hdr_file, array_smooth,
dtype=self.io.defaults.envi_write.dtype,
force=self.io.defaults.envi_write.force,
ext=self.io.defaults.envi_write.ext,
interleave=self.io.defaults.envi_write.interleave,
byteorder=self.io.defaults.envi_write.byteorder,
metadata=metadata)
if stats is True:
mean = np.nanmean(array_smooth)
std = np.nanstd(array_smooth)
cv = std/mean
df_smooth_temp = pd.DataFrame([[fname, mean, std, cv]],
columns=['fname', 'mean', 'std',
'cv'])
df_smooth_stats = df_smooth_stats.append(df_smooth_temp,
ignore_index=True)
if stats is True:
self._write_stats(dir_out, df_smooth_stats, fname_csv=name_append[1:] + '-stats.csv')
# return df_smooth_stats
def _execute_spec_smooth_pp(self, fname, base_dir_out, folder_name,
name_append, window_size, order, stats):
'''
Actually executes the spectral smooth to keep the main function a bit
cleaner
'''
print('\nSpectrally smoothing: {0}'.format(fname))
if stats is True:
df_smooth_stats = pd.DataFrame(
columns=['fname', 'mean', 'std', 'cv'])
self.io.read_cube(fname)
self.my_spectral_mod = spec_mod(self.io.spyfile)
base_dir = os.path.dirname(fname)
if base_dir_out is None:
dir_out, name_append = self._save_file_setup(
base_dir, folder_name, name_append)
else:
dir_out, name_append = self._save_file_setup(
base_dir_out, folder_name, name_append)
name_print = self._get_name_print()
array_smooth, metadata = self.my_spectral_mod.spectral_smooth(
window_size=window_size, order=order)
name_label = (name_print + name_append + '.' +
self.io.defaults.envi_write.interleave)
metadata['label'] = name_label
hdr_file = os.path.join(dir_out, name_label + '.hdr')
self.io.write_cube(hdr_file, array_smooth,
dtype=self.io.defaults.envi_write.dtype,
force=self.io.defaults.envi_write.force,
ext=self.io.defaults.envi_write.ext,
interleave=self.io.defaults.envi_write.interleave,
byteorder=self.io.defaults.envi_write.byteorder,
metadata=metadata)
if stats is True:
mean = np.nanmean(array_smooth)
std = np.nanstd(array_smooth)
cv = std/mean
df_smooth_temp = pd.DataFrame([[fname, mean, std, cv]],
columns=['fname', 'mean', 'std',
'cv'])
df_smooth_stats = df_smooth_stats.append(df_smooth_temp,
ignore_index=True)
return df_smooth_stats
def _get_fname_similar(self, name_to_match, base_dir, search_ext='bip',
level=0):
'''
Gets a similar filename from another directory
'''
fname_list = self._recurs_dir(base_dir, search_ext=search_ext,
level=level)
fname_similar = []
for fname in fname_list:
f = os.path.split(fname)[-1]
fname_short = f[:f.find('-')]
# if name_to_match in fname:
if name_to_match == fname_short:
fname_similar.append(fname)
msg1 = ('No files found with a similar name to "{0}". Please be '
'sure the images are created before continuing (e.g., did '
'you perform band math yet?)\n\nbase_dir: {1}'
''.format(name_to_match, base_dir))
msg2 = ('Multiple files found with a similar name to {0}. Please '
'delete files that are not relevant to continue.\n\nbase_dir: '
'{1}'.format(name_to_match, base_dir))
assert len(fname_similar) > 0, msg1
assert len(fname_similar) == 1, msg2
return fname_similar[0]
def _get_array_similar(self, dir_search):
'''
Retrieves the array from a directory with a similar name to the loaded
datacube (i.e., there must be a datacube loaded; self.io.spyfile should
not be ``None``; compares to ``self.io.name_short``).
Parameters:
dir_search: directory to search
'''
msg = ('Please load a SpyFile prior to using this function')
assert self.io.spyfile is not None, msg
if not os.path.isdir(dir_search):
msg = ('The passed directory does not exist; please pass a valid '
'directory path.\nDirectory: {0}'.format(dir_search))
raise IOError(msg)
fname_similar = self._get_fname_similar(
self.io.name_short, dir_search,
search_ext=self.io.defaults.envi_write.interleave, level=0)
fpath_similar = os.path.join(dir_search, fname_similar)
io_mask = hsio()
io_mask.read_cube(fpath_similar)
array = io_mask.spyfile.load()
metadata = io_mask.spyfile.metadata
return array, metadata
def _get_class_mask(self, row, filter_cols, n_classes=1):
'''
Finds the class with the lowest NDVI in ``row`` and returns the class ID
to be used to dictate which pixels get masked
Parameters:
n_classes (``int``): number of classes to mask; if 1, then will mask
the minimum ndvi; if more than 1, all classes (default: 1)
'''
row_ndvi = row[filter_cols].astype(float)
row_ndvi = row_ndvi.dropna()
print(row_ndvi)
print(n_classes)
if len(row_ndvi) == n_classes:
n_classes -= 1
row_ndvi_small = row_ndvi.nsmallest(n=n_classes)
class_name = row_ndvi_small.index.values.tolist()
class_mask = []
for name in class_name:
class_int = int(re.search(r'\d+', name).group())
class_mask.append(class_int)
return class_mask
def _recurs_dir(self, base_dir, search_ext='.csv', level=None):
'''
Searches all folders and subfolders recursively within <base_dir>
for filetypes of <search_exp>.
Returns sorted <outFiles>, a list of full path strings of each result.
Parameters:
base_dir: directory path that should include files to be returned
search_ext: file format/extension to search for in all directories
and subdirectories
level: how many levels to search; if None, searches all levels
Returns:
out_files: include the full pathname, filename, and ext of all
files that have ``search_exp`` in their name.
'''
if level is None:
level = 1
else:
level -= 1
d_str = os.listdir(base_dir)
out_files = []
for item in d_str:
full_path = os.path.join(base_dir, item)
if not os.path.isdir(full_path) and item.endswith(search_ext):
out_files.append(full_path)
elif os.path.isdir(full_path) and level >= 0:
new_dir = full_path # If dir, then search in that
out_files_temp = self._recurs_dir(new_dir, search_ext)
if out_files_temp: # if list is not empty
out_files.extend(out_files_temp) # add items
return sorted(out_files)
def _save_file_setup(self, base_dir_out, folder_name, name_append):
'''
Basic setup items when saving manipulated image files to disk
Parameters:
base_dir_out (``str``): Parent directory that all processed datacubes
will be saved.
folder_name (``str`` or ``None``): Folder to add to
``base_dir_out`` to save all the processed datacubes.
name_append (``str``): name to append to the filename.
'''
# if base_dir_out is None:
# base_dir_out = os.path.join(self.base_dir, folder_name)
if pd.isnull(folder_name):
folder_name = ''
dir_out = os.path.join(base_dir_out, folder_name)
if not os.path.isdir(dir_out):
os.mkdir(dir_out)
if name_append is None:
name_append = ''
else:
if name_append[0] != '-':
name_append = '-' + str(name_append)
return dir_out, name_append
def _get_name_label(self, row, name_print, name_append):
'''
Gets the name label for the datacube. By doing this before the
processing operation, we can check to see if the datacube has
already been processed and pass over if desired.
'''
if 'study' in row.index:
if row['study'] is not None:
name_study = 'study_' + str(row['study'] + '_')
else:
name_study = ''
if 'date' in row.index:
if row['date'] is not None:
if isinstance(row['date'], str):
row['date'] = pd.to_datetime(row['date'])
name_date = ('date_' + str(row['date'].year).zfill(4) +
str(row['date'].month).zfill(2) +
str(row['date'].day).zfill(2) + '_')
else:
name_date = ''
if row['plot_id_ref'] is not None:
name_plot = 'plot_' + str(row['plot_id_ref'])
else:
name_plot = ''
if ((len(name_study) >= 1) and (len(name_date) >= 1) and
(len(name_plot) >= 1)): # then remove the name_print variable
name_label = (name_study + name_date + name_plot + name_append +
'.' + self.io.defaults.envi_write.interleave)
else:
name_label1 = (name_print + '_' + name_study + name_date +
name_plot)
if name_label1[-1] == '_':
name_label1 = name_label1[:-1]
name_label = (name_label1 + name_append + '.' +
self.io.defaults.envi_write.interleave)
return name_label
def _get_name_print(self, fname_in=None):
'''
'''
name_print = self.io.name_short
if name_print is None and fname_in is not None:
base_name = os.path.basename(fname_in)
name_print = base_name[:base_name.find('-', base_name.rfind('_'))]
msg = ('Could not get a name for input datacube.\n')
assert name_print is not None, msg
return name_print
def _read_spectra_from_file(self, fname, columns):
'''
Reads a single spectra from file
'''
self.io.read_spec(fname + '.hdr')
meta_bands = self.io.tools.meta_bands
array = self.io.spyfile_spec.load()
data = list(np.reshape(array, (array.shape[2])) * 100)
data.insert(0, self.io.name_plot)
data.insert(0, os.path.basename(fname))
df_spec_file = pd.DataFrame(data=[data], columns=columns)
return df_spec_file
def _print_progress(self, iteration, total, prefix='', suffix='',
decimals=1, bar_length=100):
"""
Call in a loop to create terminal progress bar
@params:
iteration - Required : current iteration (Int)
total - Required : total iterations (Int)
prefix - Optional : prefix string (Str)
suffix - Optional : suffix string (Str)
decimals - Optional : positive number of decimals in percent complete (Int)
bar_length - Optional : character length of bar (Int)
"""
str_format = "{0:." + str(decimals) + "f}"
percents = str_format.format(100 * (iteration / float(total)))
filled_length = int(round(bar_length * iteration / float(total)))
bar = f'{"█" * filled_length}{"-" * (bar_length - filled_length)}'
sys.stdout.write(f'\r{prefix} |{bar}| {percents}% {suffix}'),
# sys.stdout.write('%s |%s| %s%s %s\r' % (prefix, bar, percents, '%', suffix))
if iteration == total:
sys.stdout.write('\n')
sys.stdout.flush()
[docs] def cube_to_spectra(self, fname_list=None, base_dir=None, search_ext='bip',
dir_level=0, base_dir_out=None,
folder_name='cube_to_spec',
name_append='cube-to-spec',
write_geotiff=True, out_dtype=False, out_force=None,
out_ext=False, out_interleave=False,
out_byteorder=False):
'''
Calculates the mean and standard deviation for each cube in
``fname_list`` and writes the result to a ".spec" file.
Parameters:
fname_list (``list``, optional): list of filenames to process; if
left to ``None``, will look at ``base_dir``, ``search_ext``, and
``dir_level`` parameters for files to process (default: ``None``).
base_dir (``str``, optional): directory path to search for files to
spectrally clip; if ``fname_list`` is not ``None``, ``base_dir`` will
be ignored (default: ``None``).
search_ext (``str``): file format/extension to search for in all
directories and subdirectories to determine which files to
process; if ``fname_list`` is not ``None``, ``search_ext`` will
be ignored (default: 'bip').
dir_level (``int``): The number of directory levels to search; if
``None``, searches all directory levels (default: 0).
base_dir_out (``str``): directory path to save all processed
spectra; if set to ``None``, a folder named according to the
``folder_name`` parameter is added to ``base_dir``
folder_name (``str``): folder to add to ``base_dir_out`` to save all
the processed datacubes (default: 'cube_to_spec').
name_append (``str``): name to append to the filename (default:
'cube-to-spec').
write_geotiff (``bool``): whether to save the masked RGB image as a
geotiff alongside the masked datacube.
out_XXX: Settings for saving the output files can be adjusted here
if desired. They are stored in ``batch.io.defaults``, and are
therefore accessible at a high level. See
``hsio.set_io_defaults()`` for more information on each of the
settings.
Note:
The following ``batch`` example builds on the API example results
of the `spatial_mod.crop_many_gdf`_ function. Please complete the
`spatial_mod.crop_many_gdf`_ example to be sure your directory
(i.e., ``base_dir``) is populated with multiple hyperspectral
datacubes. The following example will be using datacubes located in
the following directory:
``F:\\nigo0024\Documents\hs_process_demo\spatial_mod\crop_many_gdf``
Example:
Load and initialize the ``batch`` module, checking to be sure the
directory exists.
>>> import os
>>> from hs_process import batch
>>> data_dir = r'F:\\nigo0024\Documents\hs_process_demo'
>>> base_dir = os.path.join(data_dir, 'spatial_mod', 'crop_many_gdf')
>>> print(os.path.isdir(base_dir))
>>> hsbatch = batch(base_dir, search_ext='.bip', progress_bar=True) # searches for all files in ``base_dir`` with a ".bip" file extension
True
Use ``batch.cube_to_spectra`` to calculate the *mean* and *standard
deviation* across all pixels for each of the datacubes in
``base_dir``.
>>> hsbatch.cube_to_spectra(base_dir=base_dir, write_geotiff=False, out_force=True)
Processing file 39/40: 100%|██████████| 40/40 [00:03<00:00, 13.28it/s]------------------------------------------------| 0.0%
Use ``seaborn`` to visualize the spectra of plots 1011, 1012, and
1013. Notice how ``hsbatch.io.name_plot`` is utilized to retrieve
the plot ID, and how the *"history"* tag is referenced from the
metadata to determine the number of pixels whose reflectance was
averaged to create the mean spectra. Also remember that pixels
across the original input image likely represent a combination of
soil, vegetation, and shadow.
>>> import seaborn as sns
>>> import re
>>> fname_list = [os.path.join(base_dir, 'cube_to_spec', 'Wells_rep2_20180628_16h56m_pika_gige_7_plot_1011-cube-to-spec-mean.spec'),
os.path.join(base_dir, 'cube_to_spec', 'Wells_rep2_20180628_16h56m_pika_gige_7_plot_1012-cube-to-spec-mean.spec'),
os.path.join(base_dir, 'cube_to_spec', 'Wells_rep2_20180628_16h56m_pika_gige_7_plot_1013-cube-to-spec-mean.spec')]
>>> ax = None
>>> for fname in fname_list:
>>> hsbatch.io.read_spec(fname)
>>> meta_bands = list(hsbatch.io.tools.meta_bands.values())
>>> data = hsbatch.io.spyfile_spec.load().flatten() * 100
>>> hist = hsbatch.io.spyfile_spec.metadata['history']
>>> pix_n = re.search('<pixel number: (.*)>', hist).group(1)
>>> if ax is None:
>>> ax = sns.lineplot(x=meta_bands, y=data, label='Plot '+hsbatch.io.name_plot+' (n='+pix_n+')')
>>> else:
>>> ax = sns.lineplot(x=meta_bands, y=data, label='Plot '+hsbatch.io.name_plot+' (n='+pix_n+')', ax=ax)
>>> ax.set_xlabel('Wavelength (nm)', weight='bold')
>>> ax.set_ylabel('Reflectance (%)', weight='bold')
>>> ax.set_title(r'API Example: `batch.cube_to_spectra`', weight='bold')
.. image:: img/batch/cube_to_spectra.png
.. _spatial_mod.crop_many_gdf: hs_process.spatial_mod.html#hs_process.spatial_mod.crop_many_gdf
'''
self.io.set_io_defaults(out_dtype, out_force, out_ext, out_interleave,
out_byteorder)
if fname_list is None and base_dir is not None:
fname_list = self._recurs_dir(base_dir, search_ext, dir_level)
elif fname_list is None and base_dir is None:
# base_dir may have been stored to the ``batch`` object
base_dir = self.base_dir
msg = ('Please set ``fname_list`` or ``base_dir`` to indicate which '
'datacubes should be processed.\n')
assert base_dir is not None, msg
fname_list = self._recurs_dir(base_dir, search_ext, dir_level)
append_extra = '-mean'
if self.io.defaults.envi_write.force is False: # otherwise just overwrites if it exists
fname_list = self._check_processed(fname_list, base_dir_out,
folder_name, name_append,
append_extra, ext='.spec')
# pb_i = 0
# pb_len = len(fname_list)
# pb_prefix = 'cube_to_spectra:'
# self._print_progress(pb_i, pb_len, prefix=pb_prefix)
fname_list_p = tqdm(fname_list) if self.progress_bar is True else fname_list
for idx, fname in enumerate(fname_list_p):
if self.progress_bar is True:
fname_list_p.set_description('Processing file {0}/{1}'.format(idx, len(fname_list)))
self.io.read_cube(fname)
base_dir = os.path.dirname(fname)
if base_dir_out is None:
dir_out, name_append = self._save_file_setup(
base_dir, folder_name, name_append)
else:
dir_out, name_append = self._save_file_setup(
base_dir_out, folder_name, name_append)
name_print = self._get_name_print()
name_label = (name_print + name_append + append_extra + '.' +
self.io.defaults.envi_write.interleave)
if self._file_exists_check(
dir_out, name_label, write_geotiff=write_geotiff,
write_spec=True) is True:
# self.print_progress(idx+1, pb_len, prefix=pb_prefix)
continue
# print('Calculating mean spectra: {0}'.format(fname))
spec_mean, spec_std, array = self.io.tools.mean_datacube(
self.io.spyfile)
metadata = self.io.spyfile.metadata.copy()
# because this is specialized, we should make our own history str
n_pix = self.io.spyfile.nrows * self.io.spyfile.ncols
hist_str = (' -> hs_process.batch.cube_to_spectra[<pixel number: '
'{0}>]'.format(n_pix))
metadata['history'] += hist_str
name_label_spec = (os.path.splitext(name_label)[0] +
'.spec')
if write_geotiff is True:
self._write_geotiff(array, fname, dir_out, name_label,
metadata, self.io.tools)
# Now write spec (will change map info on metadata)
self._write_spec(dir_out, name_label_spec, spec_mean, spec_std,
metadata)
# self._print_progress(idx+1, pb_len, prefix=pb_prefix)
[docs] def segment_composite_band(self, fname_list=None, base_dir=None,
search_ext='bip', dir_level=0, base_dir_out=None,
folder_name='composite_band',
name_append='composite-band',
write_geotiff=True, wl1=None, b1=None,
list_range=True, plot_out=True,
out_dtype=False, out_force=None, out_ext=False,
out_interleave=False, out_byteorder=False):
'''
Batch processing tool to create a composite band on multiple datacubes
in the same way. ``batch.segment_composite_band`` is typically used
prior to ``batch.segment_create_mask`` to generate the
images/directory required for the masking process.
Parameters:
wl1 (``int``, ``float``, or ``list``): the wavelength (or set of
wavelengths) to be used as the first parameter of the
band math index; if ``list``, then consolidates all
bands between two wavelength values by calculating the mean
pixel value across all bands in that range (default: ``None``).
b1 (``int``, ``float``, or ``list``): the band (or set of bands)
to be used as the first parameter of the band math index; if
``list``, then consolidates all bands between two band values
by calculating the mean pixel value across all bands in that
range (default: ``None``).
list_range (``bool``): Whether bands/wavelengths passed as a list
is interpreted as a range of bands (``True``) or for each
individual band in the list (``False``). If ``list_range`` is
``True``, ``b1``/``wl1`` and ``b2``/``wl2`` should be lists
with two items, and all bands/wavelegths between the two values
will be used (default: ``True``).
plot_out (``bool``): whether to save a histogram of the band math
result (default: ``True``).
write_geotiff (``bool``): whether to save the masked RGB image as a
geotiff alongside the masked datacube.
'''
self.io.set_io_defaults(out_dtype, out_force, out_ext, out_interleave,
out_byteorder)
if fname_list is None and base_dir is not None:
fname_list = self._recurs_dir(base_dir, search_ext, dir_level)
elif fname_list is None and base_dir is None:
# base_dir may have been stored to the ``batch`` object
base_dir = self.base_dir
msg = ('Please set ``fname_list`` or ``base_dir`` to indicate which '
'datacubes should be processed.\n')
assert base_dir is not None, msg
fname_list = self._recurs_dir(base_dir, search_ext, dir_level)
# checks filenames
if self.io.defaults.envi_write.force is False: # otherwise just overwrites if it exists
append_extra = '-comp-{0}'.format(int(np.mean(wl1)))
fname_list = self._check_processed(
fname_list, base_dir_out, folder_name, name_append, append_extra)
self._execute_composite_band(fname_list, base_dir_out, folder_name,
name_append, write_geotiff, wl1, b1,
list_range, plot_out)
[docs] def segment_band_math(self, fname_list=None, base_dir=None,
search_ext='bip', dir_level=0, base_dir_out=None,
folder_name='band_math', name_append='band-math',
write_geotiff=True, method='ndi', wl1=None, wl2=None,
wl3=None, b1=None, b2=None, b3=None,
list_range=True, plot_out=True,
out_dtype=False, out_force=None, out_ext=False,
out_interleave=False, out_byteorder=False):
'''
Batch processing tool to perform band math on multiple datacubes in the
same way. ``batch.segment_band_math`` is typically used prior to
``batch.segment_create_mask`` to generate the images/directory required
for the masking process.
Parameters:
method (``str``): Must be one of "ndi" (normalized difference
index), "ratio" (simple ratio index), "derivative"
(deriviative-type index), or "mcari2" (modified chlorophyll
absorption index2). Indicates what kind of band math should be
performed on the input datacube. The "ndi" method leverages
``segment.band_math_ndi()``, the "ratio" method leverages
``segment.band_math_ratio()``, and the "derivative" method
leverages ``segment.band_math_derivative()``. Please see the
``segment`` documentation for more information (default:
"ndi").
wl1 (``int``, ``float``, or ``list``): the wavelength (or set of
wavelengths) to be used as the first parameter of the
band math index; if ``list``, then consolidates all
bands between two wavelength values by calculating the mean
pixel value across all bands in that range (default: ``None``).
wl2 (``int``, ``float``, or ``list``): the wavelength (or set of
wavelengths) to be used as the second parameter of the
band math index; if ``list``, then consolidates all
bands between two wavelength values by calculating the mean
pixel value across all bands in that range (default: ``None``).
b1 (``int``, ``float``, or ``list``): the band (or set of bands) to be
used as the first parameter of the band math index;
if ``list``, then consolidates all bands between two band values
by calculating the mean pixel value across all bands in that
range (default: ``None``).
b2 (``int``, ``float``, or ``list``): the band (or set of bands) to be
used as the second parameter of the band math
index; if ``list``, then consolidates all bands between two band
values by calculating the mean pixel value across all bands in
that range (default: ``None``).
list_range (``bool``): Whether bands/wavelengths passed as a list is
interpreted as a range of bands (``True``) or for each individual
band in the list (``False``). If ``list_range`` is ``True``,
``b1``/``wl1`` and ``b2``/``wl2`` should be lists with two items, and
all bands/wavelegths between the two values will be used
(default: ``True``).
plot_out (``bool``): whether to save a histogram of the band math
result (default: ``True``).
write_geotiff (``bool``): whether to save the masked RGB image as a
geotiff alongside the masked datacube.
Note:
The following ``batch`` example builds on the API example results
of the `spatial_mod.crop_many_gdf`_ function. Please complete the
`spatial_mod.crop_many_gdf`_ example to be sure your directory
(i.e., ``base_dir``) is populated with multiple hyperspectral
datacubes. The following example will be using datacubes located in
the following directory:
``F:\\nigo0024\Documents\hs_process_demo\spatial_mod\crop_many_gdf``
Example:
Load and initialize the ``batch`` module, checking to be sure the
directory exists.
>>> import os
>>> from hs_process import batch
>>> base_dir = r'F:\\nigo0024\Documents\hs_process_demo\spatial_mod\crop_many_gdf'
>>> print(os.path.isdir(base_dir))
True
>>> hsbatch = batch(base_dir, search_ext='.bip') # searches for all files in ``base_dir`` with a ".bip" file extension
Use ``batch.segment_band_math`` to compute the MCARI2 (Modified
Chlorophyll Absorption Ratio Index Improved; Haboudane et al.,
2004) spectral index for each of the datacubes in ``base_dir``. See
`Harris Geospatial`_ for more information about the MCARI2 spectral
index and references to other spectral indices.
>>> folder_name = 'band_math_mcari2-800-670-550' # folder name can be modified to be more descriptive in what type of band math is being performed
>>> method = 'mcari2' # must be one of "ndi", "ratio", "derivative", or "mcari2"
>>> wl1 = 800
>>> wl2 = 670
>>> wl3 = 550
>>> hsbatch.segment_band_math(base_dir=base_dir, folder_name=folder_name,
name_append='band-math', write_geotiff=True,
method=method, wl1=wl1, wl2=wl2, wl3=wl3,
plot_out=True, out_force=True)
Bands used (``b1``): [198]
Bands used (``b2``): [135]
Bands used (``b3``): [77]
Wavelengths used (``b1``): [799.0016]
Wavelengths used (``b2``): [669.6752]
Wavelengths used (``b3``): [550.6128]
Saving F:\\nigo0024\Documents\hs_process_demo\spatial_mod\crop_many_gdf\band_math_mcari2-800-670-550\Wells_rep2_20180628_16h56m_pika_gige_7_plot_1011-band-math-mcari2-800-670-550.bip
...
``batch.segment_band_math`` creates a new folder in ``base_dir``
(in this case the new directory is
``F:\\nigo0024\Documents\hs_process_demo\spatial_mod\crop_many_gdf\band_math_mcari2-800-670-550``)
which contains several data products. The **first** is
``band-math-stats.csv``: a spreadsheet containing summary
statistics for each of the image cubes that were processed via
``batch.segment_band_math``; stats include *pixel count*,
*mean*, *standard deviation*, *median*, and *percentiles* across
all image pixels.
**Second** is a ``geotiff`` file for each of the image cubes after the
band math processing. This can be opened in *QGIS* to visualize in
a spatial reference system, or can be opened using any software
that supports floating point *.tif* files.
.. image:: img/batch/segment_band_math_plot_611-band-math-mcari2-800-670-550_tif.png
**Third** is the band math raster saved in the *.hdr* file format.
Note that the data conained here should be the same as in the
*.tif* file, so it's a matter of preference as to what may be more
useful. This single band *.hdr* can also be opend in *QGIS*.
**Fourth** is a histogram of the band math data contained in the
image. The histogram illustrates the 90th percentile value, which
may be useful in the segmentation step (e.g., see
`batch.segment_create_mask`_).
.. image:: img/batch/segment_band_math_plot_611-band-math-mcari2-800-670-550.png
.. _Harris Geospatial: https://www.harrisgeospatial.com/docs/NarrowbandGreenness.html#Modified3
.. _batch.segment_create_mask: hs_process.batch.html#hs_process.batch.segment_create_mask
'''
self.io.set_io_defaults(out_dtype, out_force, out_ext, out_interleave,
out_byteorder)
if fname_list is None and base_dir is not None:
fname_list = self._recurs_dir(base_dir, search_ext, dir_level)
elif fname_list is None and base_dir is None:
# base_dir may have been stored to the ``batch`` object
base_dir = self.base_dir
msg = ('Please set ``fname_list`` or ``base_dir`` to indicate which '
'datacubes should be processed.\n')
assert base_dir is not None, msg
fname_list = self._recurs_dir(base_dir, search_ext, dir_level)
# else fname_list must be passed directly
if method == 'ndi':
append_extra = ('-{0}-{1}-{2}'
''.format(method, int(np.mean(wl1)),
int(np.mean(wl2))))
elif method == 'ratio':
append_extra = ('-{0}-{1}-{2}'
''.format(method, int(np.mean(wl1)),
int(np.mean(wl2))))
elif method == 'derivative':
append_extra = ('-{0}-{1}-{2}-{3}'
''.format(method, int(np.mean(wl1)),
int(np.mean(wl2)),
int(np.mean(wl3))))
elif method == 'mcari2':
append_extra = ('-{0}-{1}-{2}-{3}'
''.format(method, int(np.mean(wl1)),
int(np.mean(wl2)),
int(np.mean(wl3))))
# checks filenames
if self.io.defaults.envi_write.force is False: # otherwise just overwrites if it exists
fname_list = self._check_processed(
fname_list, base_dir_out, folder_name, name_append, append_extra)
self._execute_band_math(fname_list, base_dir_out, folder_name,
name_append, write_geotiff, method, wl1, wl2,
wl3, b1, b2, b3, list_range, plot_out)
[docs] def segment_create_mask(self, fname_list=None, base_dir=None,
search_ext='bip', dir_level=0, mask_dir=None,
base_dir_out=None,
folder_name='mask', name_append='mask',
write_datacube=True, write_spec=True,
write_geotiff=True, mask_thresh=None,
mask_percentile=None, mask_side='lower',
out_dtype=False, out_force=None, out_ext=False,
out_interleave=False, out_byteorder=False):
'''
Batch processing tool to create a masked array on many datacubes.
``batch.segment_create_mask`` is typically used after
``batch.segment_band_math`` to mask all the datacubes in a directory
based on the result of the band math process.
Parameters:
mask_thresh (``float`` or ``int``): The value for which to mask the
array; should be used with ``side`` parameter (default: ``None``).
mask_percentile (``float`` or ``int``): The percentile of pixels to
mask; if ``percentile``=95 and ``side``='lower', the lowest 95% of
pixels will be masked following the band math operation
(default: ``None``; range: 0-100).
mask_side (``str``): The side of the threshold for which to apply the
mask. Must be either 'lower', 'upper', 'outside', or ``None``;
if 'lower', everything below the threshold will be masked; if
'outside', the ``thresh`` / ``percentile`` parameter must be
list-like with two values indicating the lower and upper bounds
- anything outside of these values will be masked out; if
``None``, only the values that exactly match the threshold will
be masked (default: 'lower').
geotiff (``bool``): whether to save the masked RGB image as a geotiff
alongside the masked datacube.
Note:
The following ``batch`` example builds on the API example results
of `spatial_mod.crop_many_gdf`_ and `batch.segment_band_math`_.
Please complete each of those API examples to be sure your
directories (i.e., ``base_dir``, and ``mask_dir``) are populated
with image files. The following example will be masking datacubes
located in:
``F:\\nigo0024\Documents\hs_process_demo\spatial_mod\crop_many_gdf``
based on MCARI2 images located in:
``F:\\nigo0024\Documents\hs_process_demo\spatial_mod\crop_many_gdf\\band_math_mcari2-800-670-550``
Example:
Load and initialize the ``batch`` module, ensuring ``base_dir`` is
a valid directory
>>> import os
>>> from hs_process import batch
>>> base_dir = r'F:\\nigo0024\Documents\hs_process_demo\spatial_mod\crop_many_gdf'
>>> print(os.path.isdir(base_dir))
True
>>> hsbatch = batch(base_dir, search_ext='.bip') # searches for all files in ``base_dir`` with a ".bip" file extension
There must be a single-band image that will be used to determine
which datacube pixels are to be masked (determined via the
``mask_dir`` parameter). Point to the directory that contains the
MCARI2 images.
>>> mask_dir = os.path.join(base_dir, 'band_math_mcari2-800-670-550')
>>> print(os.path.isdir(mask_dir))
True
Indicate how the MCARI2 images should be used to determine which
hyperspectal pixels are to be masked. The available parameters for
controlling this are ``mask_thresh``, ``mask_percentile``, and
``mask_side``. We will mask out all pixels that fall below the
MCARI2 90th percentile.
>>> mask_percentile = 90
>>> mask_side = 'lower'
Finally, indicate the folder to save the masked datacubes and
perform the batch masking via ``batch.segment_create_mask``
>>> folder_name = 'mask_mcari2_90th'
>>> hsbatch.segment_create_mask(base_dir=base_dir, mask_dir=mask_dir,
folder_name=folder_name,
name_append='mask-mcari2-90th', write_geotiff=True,
mask_percentile=mask_percentile,
mask_side=mask_side)
Saving F:\\nigo0024\Documents\hs_process_demo\spatial_mod\crop_many_gdf\mask_mcari2_90th\Wells_rep2_20180628_16h56m_pika_gige_7_plot_1011-mask-mcari2-90th.bip
Saving F:\\nigo0024\Documents\hs_process_demo\spatial_mod\crop_many_gdf\mask_mcari2_90th\Wells_rep2_20180628_16h56m_pika_gige_7_plot_1011-mask-mcari2-90th-spec-mean.spec
...
.. image:: img/batch/segment_create_mask_inline.png
``batch.segment_create_mask`` creates a new folder in ``base_dir``
named according to the ``folder_name`` parameter
(in this case the new directory is
``F:\\nigo0024\Documents\hs_process_demo\spatial_mod\crop_many_gdf\mask_mcari2_90th``)
which contains several data products. The **first** is
``mask-stats.csv``: a spreadsheet containing the band math
threshold value for each image file. In this example, the MCARI2
value corresponding to the 90th percentile is listed.
+------------+------------+-------------+
| fname | plot_id |lower-pctl-90|
+============+============+=============+
| ... | 1011 | 0.83222 |
+------------+------------+-------------+
| ... | 1012 | 0.81112 |
+------------+------------+-------------+
| ... | 1013 | 0.74394 |
+------------+------------+-------------+
...etc.
**Second** is a ``geotiff`` file for each of the image cubes after the
masking procedure. This can be opened in *QGIS* to visualize in
a spatial reference system, or can be opened using any software
that supports floating point *.tif* files. The masked pixels are
saved as ``null`` values and should render transparently.
.. image:: img/batch/segment_create_mask_geotiff.png
**Third** is the full hyperspectral datacube, also with the masked
pixels saved as ``null`` values. Note that the only pixels
remaining are the 10% with the highest MCARI2 values.
.. image:: img/batch/segment_create_mask_datacube.png
**Fourth** is the mean spectra across the unmasked datacube pixels.
This is illustrated above by the green line plot (the light green
shadow represents the standard deviation for each band).
.. _Harris Geospatial: https://www.harrisgeospatial.com/docs/NarrowbandGreenness.html#Modified3
.. _batch.segment_band_math: hs_process.batch.html#hs_process.batch.segment_band_math
'''
self.io.set_io_defaults(out_dtype, out_force, out_ext, out_interleave,
out_byteorder)
if fname_list is None and base_dir is not None:
fname_list = self._recurs_dir(base_dir, search_ext, dir_level)
elif fname_list is None and base_dir is None:
# base_dir may have been stored to the ``batch`` object
base_dir = self.base_dir
msg = ('Please set ``fname_list`` or ``base_dir`` to indicate which '
'datacubes should be processed.\n')
assert base_dir is not None, msg
fname_list = self._recurs_dir(base_dir, search_ext, dir_level)
if self.io.defaults.envi_write.force is False: # otherwise just overwrites if it exists
fname_list = self._check_processed(fname_list, base_dir_out,
folder_name, name_append)
self._execute_mask(fname_list, mask_dir, base_dir_out, folder_name,
name_append, write_datacube, write_spec,
write_geotiff, mask_thresh, mask_percentile,
mask_side)
[docs] def spatial_crop(self, fname_sheet=None, base_dir=None, search_ext='bip',
dir_level=0, base_dir_out=None,
folder_name='spatial_crop', name_append='spatial-crop',
write_geotiff=True, method='single', gdf=None, out_dtype=False,
out_force=None, out_ext=False, out_interleave=False,
out_byteorder=False):
'''
Iterates through a spreadsheet that provides necessary information
about how each image should be cropped and how it should be saved.
If ``gdf`` is passed (a geopandas.GoeDataFrame polygon file), the
cropped images will be shifted to the center of appropriate 'plot_id'
polygon.
Parameters:
fname_sheet (``fname``, ``pandas.DataFrame``, or ``None``, optional):
The filename of the spreadsheed that provides the
necessary information for fine-tuning the batch process
cropping. See below for more information about the required and
optional contents of ``fname_sheet`` and how to properly format
it. Optionally, ``fname_sheet`` can be a ``Pandas.DataFrame``.
If left to ``None``, ``base_dir`` and ``gdf`` must be passed.
base_dir (``str``, optional): directory path to search for files to
spatially crop; if ``fname_sheet`` is not ``None``,
``base_dir`` will be ignored (default: ``None``).
base_dir_out (``str``, optional): output directory of the cropped
image (default: ``None``).
folder_name (``str``, optional): folder to add to ``base_dir_out``
to save all the processed datacubes (default: 'spatial_crop').
name_append (``str``, optional): name to append to the filename
(default: 'spatial-crop').
write_geotiff (``bool``, optional): whether to save an RGB image as
a geotiff alongside the cropped datacube.
method (``str``, optional): Must be one of "single" or
"many_gdf". Indicates whether a single plot should be cropped
from the input datacube or if many/multiple plots should be
cropped from the input datacube. The "single" method leverages
`spatial_mod.crop_single()`_ and the "many_gdf" method
leverages `spatial_mod.crop_many_gdf()`_. Please
see the ``spatial_mod`` documentation for more information
(default: "single").
gdf (``geopandas.GeoDataFrame``, optional): the plot names and
polygon geometery of each of the plots; 'plot_id' must be used as
a column name to identify each of the plots, and should be an
integer.
out_XXX: Settings for saving the output files can be adjusted here
if desired. They are stored in ``batch.io.defaults``, and are
therefore accessible at a high level. See
`hsio.set_io_defaults()`_ for more information on each of the
settings.
**Tips and Tricks for** ``fname_sheet`` **when** ``gdf`` **is not passed**
If ``gdf`` is not passed, ``fname_sheet`` may have the following
required column headings that correspond to the relevant parameters in
`spatial_mod.crop_single()`_ and `spatial_mod.crop_many_gdf()`_:
#. "directory"
#. "name_short"
#. "name_long"
#. "ext"
#. "pix_e_ul"
#. "pix_n_ul".
With this minimum input, ``batch.spatial_crop`` will read in each
image, crop from the upper left pixel (determined as
``pix_e_ul``/``pix_n_ul``) to the lower right pixel calculated
based on ``crop_e_pix``/``crop_n_pix`` (which is the width of the
cropped area in units of pixels).
Note:
``crop_e_pix`` and ``crop_n_pix`` have default values (see
`defaults.crop_defaults()`_), but they can also be passed
specifically for each datacube by including appropriate columns in
``fname_sheet`` (which takes precedence over
``defaults.crop_defaults``).
``fname_sheet`` may also have the following optional column headings:
#. "crop_e_pix"
#. "crop_n_pix"
#. "crop_e_m"
#. "crop_n_m"
#. "buf_e_pix"
#. "buf_n_pix"
#. "buf_e_m"
#. "buf_n_m"
#. "gdf_shft_e_m"
#. "gdf_shft_n_m"
#. "plot_id_ref"
#. "study"
#. "date"
**More** ``fname_sheet`` **Tips and Tricks**
#. These optional inputs passed via ``fname_sheet`` allow more control
over exactly how the images are to be cropped. For a more detailed
explanation of the information that many of these columns are
intended to contain, see the documentation for
`spatial_mod.crop_single()`_ and `spatial_mod.crop_many_gdf()`_.
Those parameters not referenced should be apparent in the API
examples and tutorials.
#. If the column names are different in ``fname_sheet`` than described
here, `defaults.spat_crop_cols()`_ can be modified to indicate which
columns correspond to the relevant information.
#. The *date* and *study* columns do not impact how the datacubes are
to be cropped, but if this information exists,
``batch.spatial_crop`` adds it to the filename of the cropped
datacube. This can be used to avoid overwriting datacubes with
similar names, and is especially useful when processing imagery from
many dates and/or studies/locations and saving them in the same
directory. If "study", "date", and "plot_id" are all passed, this
information is used to formulate the output file name; e.g.,
*study_wells_date_20180628_plot_527-spatial-crop.bip*. If either
"study" or "date" is missing, the populated variables wil be
appended to the end of the ``hsio.name_short`` string; e.g.,
*plot_9_3_pika_gige_1_plot_527-spatial-crop.bip*.
#. Any other columns can be added to ``fname_sheet``, but
``batch.spatial_crop()`` does not use them in any way.
Note:
The following ``batch`` example only actually processes *a single*
hyperspectral image. If more datacubes were present in
``base_dir``, however, ``batch.spatial_crop`` would process all
datacubes that were available.
Note:
This example uses ``spatial_mod.crop_many_gdf`` to crop many
plots from a datacube using a polygon geometry file describing the
spatial extent of each plot.
Example:
Load and initialize the ``batch`` module, checking to be sure the
directory exists.
>>> import os
>>> import geopandas as gpd
>>> import pandas as pd
>>> from hs_process import batch
>>> base_dir = r'F:\\nigo0024\Documents\hs_process_demo'
>>> print(os.path.isdir(base_dir))
>>> hsbatch = batch(base_dir, search_ext='.bip', dir_level=0,
progress_bar=True) # searches for all files in ``base_dir`` with a ".bip" file extension
True
Load the plot geometry as a ``geopandas.GeoDataFrame``
>>> fname_gdf = r'F:\\nigo0024\Documents\hs_process_demo\plot_bounds.geojson'
>>> gdf = gpd.read_file(fname_gdf)
Perform the spatial cropping using the *"many_gdf"* `method`. Note
that nothing is being passed to `fname_sheet` here, so
``batch.spatial_crop`` is simply going to attempt to crop all plots
contained within `gdf` that overlap with any datacubes in
``base_dir``.
Passing ``fname_sheet`` directly is definitely more flexible for
customization. However, some customization is possible while not
passing ``fname_sheet``. In the example below, we set an easting
and northing buffer, as well as limit the number of plots to crop
to 40. These defaults trickle through to
``spatial_mod.crop_many_gdf()``, so by setting them on the
``batch`` object, they will be recognized when calculating crop
boundaries from ``gdf``.
>>> hsbatch.io.defaults.crop_defaults.buf_e_m = 2 # Sets buffer in the easting direction (units of meters)
>>> hsbatch.io.defaults.crop_defaults.buf_n_m = 0.5
>>> hsbatch.io.defaults.crop_defaults.n_plots = 40 # We can limit the number of plots to process from gdf
>>> hsbatch.spatial_crop(base_dir=base_dir, method='many_gdf',
gdf=gdf, out_force=True)
Because ``fname_list`` was passed instead of ``fname_sheet``, there is not a way to infer the study name and date. Therefore, "study" and "date" will be omitted from the output file name. If you would like output file names to include "study" and "date", please pass ``fname_sheet`` with "study" and "date" columns.
Processing file 39/40: 100%|██████████| 40/40 [00:02<00:00, 17.47it/s]
.. image:: img/batch/spatial_crop_inline.png
A new folder was created in ``base_dir``
- ``F:\\nigo0024\Documents\hs_process_demo\spatial_crop`` - that
contains the cropped datacubes and the cropped ``geotiff`` images.
The Plot ID from the ``gdf`` is used to name each datacube
according to its plot ID. The ``geotiff`` images can be opened in
*QGIS* to visualize the images after cropping them.
.. image:: img/batch/spatial_crop_tifs.png
The cropped images were brightened in *QGIS* to emphasize the
cropped boundaries. The plot boundaries are overlaid for reference
(notice the 2.0 m buffer on the East/West ends and the 0.5 m buffer
on the North/South sides).
.. _defaults.crop_defaults(): hs_process.defaults.html#hs_process.defaults.crop_defaults
.. _defaults.spat_crop_cols(): hs_process.defaults.html#hs_process.defaults.spat_crop_cols
.. _hsio.set_io_defaults(): hs_process.hsio.html#hs_process.hsio.set_io_defaults
.. _spatial_mod.crop_single(): hs_process.spatial_mod.html#hs_process.spatial_mod.crop_single
.. _spatial_mod.crop_many_gdf(): hs_process.spatial_mod.html#hs_process.spatial_mod.crop_many_gdf
'''
if method == 'many_gdf':
msg1 = ('Please pass a valid ``geopandas.GeoDataFrame`` if using '
'the "many_gdf" method.\n')
msg2 = ('Please be sure the passed ``geopandas.GeoDataFrame`` has '
'a column by the name of "plot_id", indicating the plot '
'ID for each polygon geometry if using the "many_gdf" '
'method.\n')
assert isinstance(gdf, gpd.GeoDataFrame), msg1
assert 'plot_id' in gdf.columns, msg2
self.io.set_io_defaults(out_dtype, out_force, out_ext, out_interleave,
out_byteorder)
if fname_sheet is None and base_dir is not None:
fname_list = self._recurs_dir(base_dir, search_ext, dir_level)
elif fname_sheet is None and base_dir is None:
# base_dir may have been stored to the ``batch`` object
base_dir = self.base_dir
msg = ('Please set ``fname_list`` or ``base_dir`` to indicate which '
'datacubes should be processed.\n')
assert base_dir is not None, msg
fname_list = self._recurs_dir(base_dir, search_ext, dir_level)
else: # fname_list comes from fname_sheet
fname_list = None
# Either fname_sheet or fname_list should be None
self._execute_crop(fname_sheet, fname_list, base_dir_out,
folder_name, name_append, write_geotiff, method, gdf)
[docs] def spectra_combine(self, fname_list=None, base_dir=None,
search_ext='bip', dir_level=0, base_dir_out=None,
out_dtype=False, out_force=None, out_ext=False,
out_interleave=False, out_byteorder=False):
'''
Batch processing tool to gather all pixels from every image in a
directory, compute the mean and standard deviation, and save as a
single spectra (i.e., a spectra file is equivalent to a single spectral
pixel with no spatial information).
Parameters:
fname_list (``list``, optional): list of filenames to process; if
left to ``None``, will look at ``base_dir``, ``search_ext``,
and ``dir_level`` parameters for files to process (default:
``None``).
base_dir (``str``, optional): directory path to search for files to
spectrally clip; if ``fname_list`` is not ``None``,
``base_dir`` will be ignored (default: ``None``).
search_ext (``str``): file format/extension to search for in all
directories and subdirectories to determine which files to
process; if ``fname_list`` is not ``None``, ``search_ext`` will
be ignored (default: 'bip').
dir_level (``int``): The number of directory levels to search; if
``None``, searches all directory levels (default: 0).
base_dir_out (``str``): directory path to save all processed
datacubes; if set to ``None``, a folder named according to the
``folder_name`` parameter is added to ``base_dir`` (default:
``None``).
out_XXX: Settings for saving the output files can be adjusted here
if desired. They are stored in ``batch.io.defaults, and are
therefore accessible at a high level. See
``hsio.set_io_defaults()`` for more information on each of the
settings.
Note:
The following example will load in several small hyperspectral
radiance datacubes *(not reflectance)* that were previously cropped
manually (via Spectronon software). These datacubes represent the
radiance values of grey reference panels that were placed in the
field to provide data necessary for converting radiance imagery
to reflectance. These particular datacubes were extracted
from several different images captured within ~10 minutes of each
other.
Example:
Load and initialize the ``batch`` module, checking to be sure the
directory exists.
>>> import os
>>> from hs_process import batch
>>> base_dir = r'F:\\nigo0024\Documents\hs_process_demo\cube_ref_panels'
>>> print(os.path.isdir(base_dir))
True
>>> hsbatch = batch(base_dir)
Combine all the *radiance* datacubes in the directory via
``batch.spectra_combine``.
>>> hsbatch.spectra_combine(base_dir=base_dir, search_ext='bip',
dir_level=0)
Combining datacubes/spectra into a single mean spectra.
Number of input datacubes/spectra: 7
Total number of pixels: 1516
Saving F:\\nigo0024\Documents\hs_process_demo\cube_ref_panels\spec_mean_spy.spec
Visualize the combined spectra by opening in *Spectronon*. The
solid line represents the mean radiance spectra across all pixels
and images in ``base_dir``, and the lighter, slightly transparent
line represents the standard deviation of the radiance across all
pixels and images in ``base_dir``.
.. image:: img/batch/spectra_combine.png
Notice the lower signal at the oxygen absorption region (near 770
nm). After converting datacubes to reflectance, it may be
desireable to spectrally clip this region (see
`spec_mod.spectral_clip()`_)
.. _spec_mod.spectral_clip(): hs_process.spec_mod.html#hs_process.spec_mod.spectral_clip
'''
self.io.set_io_defaults(out_dtype, out_force, out_ext, out_interleave,
out_byteorder)
if fname_list is None and base_dir is not None:
msg1 = ('``base_dir`` is not a valid directory.\n')
assert os.path.isdir(base_dir), msg1
self.base_dir = base_dir
fname_list = self._recurs_dir(base_dir, search_ext, dir_level)
elif fname_list is None and base_dir is None:
# base_dir may have been stored to the ``batch`` object
base_dir = self.base_dir
msg2 = ('Please set ``fname_list`` or ``base_dir`` to indicate '
'which datacubes should be processed.\n')
assert base_dir is not None, msg2
fname_list = self._recurs_dir(base_dir, search_ext, dir_level)
self._execute_spec_combine(fname_list, base_dir_out)
[docs] def spectra_derivative(
self, fname_list=None, base_dir=None, search_ext='spec',
dir_level=0, base_dir_out=None, folder_name='spec_derivative',
name_append='spec-derivative', order=1,
out_dtype=False, out_force=None, out_ext=False,
out_interleave=False, out_byteorder=False):
'''
Batch processing tool to calculate the numeric spectral derivative for
multiple spectra.
Parameters:
fname_list (``list``, optional): list of filenames to process; if
left to ``None``, will look at ``base_dir``, ``search_ext``,
and ``dir_level`` parameters for files to process (default:
``None``).
base_dir (``str``, optional): directory path to search for files to
process; if ``fname_list`` is not ``None``, ``base_dir`` will
be ignored (default: ``None``).
search_ext (``str``): file format/extension to search for in all
directories and subdirectories to determine which files to
process; if ``fname_list`` is not ``None``, ``search_ext`` will
be ignored (default: 'bip').
dir_level (``int``): The number of directory levels to search; if
``None``, searches all directory levels (default: 0).
base_dir_out (``str``): directory path to save all processed
spectra; if set to ``None``, a folder named according to the
``folder_name`` parameter is added to ``base_dir``
folder_name (``str``): folder to add to ``base_dir_out`` to save
all the processed datacubes (default: 'spec_derivative').
name_append (``str``): name to append to the filename (default:
'spec-derivative').
order (``int``): The order of the derivative (default: 1).
out_XXX: Settings for saving the output files can be adjusted here
if desired. They are stored in ``batch.io.defaults, and are
therefore accessible at a high level. See
``hsio.set_io_defaults()`` for more information on each of the
settings.
Note:
The following `batch` example builds on the API example results
of the `batch.cube_to_spectra`_ function. Please complete
both the `spatial_mod.crop_many_gdf`_ and
`batch.cube_to_spectra`_ examples to be sure your directory
(i.e., `base_dir`) is populated with multiple hyperspectral
spectra. The following example will be using spectra located in the
following directory:
``F:\\nigo0024\Documents\hs_process_demo\spatial_mod\crop_many_gdf\cube_to_spec``
Example:
Load and initialize the ``batch`` module, checking to be sure the
directory exists.
>>> import os
>>> from hs_process import batch
>>> data_dir = r'F:\\nigo0024\Documents\hs_process_demo'
>>> base_dir = os.path.join(data_dir, 'spatial_mod', 'crop_many_gdf', 'cube_to_spec')
>>> print(os.path.isdir(base_dir))
>>> hsbatch = batch(base_dir, search_ext='.spec', progress_bar=True)
Use ``batch.spectra_derivative`` to calculate the central finite
difference (i.e., the numeric spectral derivative) for each of the
.spec files in ``base_dir``.
>>> order = 1
>>> hsbatch.spectra_derivative(base_dir=base_dir, order=order, out_force=True)
Use seaborn to visualize the derivative spectra of plots 1011,
1012, and 1013.
>>> import seaborn as sns
>>> import re
>>> fname_list = [os.path.join(base_dir, 'spec_derivative', 'Wells_rep2_20180628_16h56m_pika_gige_7_plot_1011-spec-derivative-order-{0}.spec'.format(order)),
os.path.join(base_dir, 'spec_derivative', 'Wells_rep2_20180628_16h56m_pika_gige_7_plot_1012-spec-derivative-order-{0}.spec'.format(order)),
os.path.join(base_dir, 'spec_derivative', 'Wells_rep2_20180628_16h56m_pika_gige_7_plot_1013-spec-derivative-order-{0}.spec'.format(order))]
>>> ax = None
>>> for fname in fname_list:
>>> hsbatch.io.read_spec(fname)
>>> meta_bands = list(hsbatch.io.tools.meta_bands.values())
>>> data = hsbatch.io.spyfile_spec.open_memmap().flatten() * 100
>>> hist = hsbatch.io.spyfile_spec.metadata['history']
>>> pix_n = re.search('<pixel number: (?s)(.*)>] ->', hist).group(1)
>>> if ax is None:
>>> ax = sns.lineplot(meta_bands, 0, color='gray')
>>> ax = sns.lineplot(x=meta_bands, y=data, label='Plot '+hsbatch.io.name_plot+' (n='+pix_n+')')
>>> else:
>>> ax = sns.lineplot(x=meta_bands, y=data, label='Plot '+hsbatch.io.name_plot+' (n='+pix_n+')', ax=ax)
>>> ax.set(ylim=(-1, 1))
>>> ax.set_xlabel('Wavelength (nm)', weight='bold')
>>> ax.set_ylabel('Derivative reflectance (%)', weight='bold')
>>> ax.set_title(r'API Example: `batch.spectra_derivative`', weight='bold')
.. image:: img/batch/spectra_derivative.png
.. _spatial_mod.crop_many_gdf: hs_process.spatial_mod.html#hs_process.spatial_mod.crop_many_gdf
.. _batch.cube_to_spectra: hs_process.batch.html#hs_process.batch.cube_to_spectra
'''
self.io.set_io_defaults(out_dtype, out_force, out_ext, out_interleave,
out_byteorder)
if fname_list is None and base_dir is not None:
fname_list = self._recurs_dir(base_dir, search_ext, dir_level)
elif fname_list is None and base_dir is None:
# base_dir may have been stored to the ``batch`` object
base_dir = self.base_dir
msg = ('Please set ``fname_list`` or ``base_dir`` to indicate '
'which spectra should be processed.\n')
assert base_dir is not None, msg
fname_list = self._recurs_dir(base_dir, search_ext, dir_level)
if name_append:
name_append = '{0}-order-{1}'.format(name_append, order)
if self.io.defaults.envi_write.force is False: # otherwise just overwrites if it exists
fname_list = self._check_processed(fname_list, base_dir_out,
folder_name, name_append,
ext='.spec')
fname_list_p = tqdm(fname_list) if self.progress_bar is True else fname_list
for idx, fname in enumerate(fname_list_p):
if self.progress_bar is True:
fname_list_p.set_description('Processing file {0}/{1}'.format(idx, len(fname_list)))
self.io.read_spec(fname)
base_dir = os.path.dirname(fname)
if base_dir_out is None:
dir_out, name_append = self._save_file_setup(
base_dir, folder_name, name_append)
else:
dir_out, name_append = self._save_file_setup(
base_dir_out, folder_name, name_append)
name_print = self._get_name_print()
name_label = (name_print + name_append + '.spec')
if self._file_exists_check(dir_out, name_label,
write_spec=True) is True:
continue
self.my_spectral_mod = spec_mod(self.io.spyfile_spec)
spec_dydx, metadata_dydx = self.my_spectral_mod.spec_derivative(
spyfile_spec=self.my_spectral_mod.spyfile, order=order)
# name_label_spec = (os.path.splitext(name_label)[0] +
# '-derivative-{0}.spec'.format(order))
# self.spec_dydx = spec_dydx
# self.metadata_dydx = metadata_dydx
self._write_spec(dir_out, name_label, spec_dydx,
spec_std=None, metadata=metadata_dydx)
[docs] def spectra_to_csv(self, fname_list=None, base_dir=None, search_ext='spec',
dir_level=0, base_dir_out=None, name='stats-spectra',
multithread=False):
'''
Reads all the ``.spec`` files in a direcory and saves their reflectance
information to a ``.csv``. ``batch.spectra_to_csv`` is identical to
``batch.spectra_to_df`` except a ``.csv`` file is saved rather than
returning a ``pandas.DataFrame``.
Parameters:
fname_list (``list``, optional): list of filenames to process; if
left to ``None``, will look at ``base_dir``, ``search_ext``, and
``dir_level`` parameters for files to process (default: ``None``).
base_dir (``str``, optional): directory path to search for files to
spectrally clip; if ``fname_list`` is not ``None``, ``base_dir`` will
be ignored (default: ``None``).
search_ext (``str``): file format/extension to search for in all
directories and subdirectories to determine which files to
process; if ``fname_list`` is not ``None``, ``search_ext`` will
be ignored (default: 'bip').
dir_level (``int``): The number of directory levels to search; if
``None``, searches all directory levels (default: 0).
base_dir_out (``str``): directory path to save all processed
datacubes; if set to ``None``, file is saved to ``base_dir``
name (``str``): The output filename (default: "stats-spectra").
multithread (``bool``): Whether to leverage multi-thread processing
when reading the .spec files. Setting to ``True`` should speed
up the time it takes to read all .spec files.
Note:
The following example builds on the API example results of
`batch.segment_band_math()`_ and `batch.segment_create_mask()_.
Please complete each of those API examples to be sure your
directory (i.e.,
``F:\\nigo0024\Documents\hs_process_demo\spatial_mod\crop_many_gdf\mask_mcari2_90th``)
is populated with image files.
Example:
Load and initialize the ``batch`` module, checking to be sure the
directory exists.
>>> import os
>>> from hs_process import batch
>>> base_dir = r'F:\\nigo0024\Documents\hs_process_demo\spatial_mod\crop_many_gdf\mask_mcari2_90th'
>>> print(os.path.isdir(base_dir))
True
>>> hsbatch = batch(base_dir)
Read all the ``.spec`` files in ``base_dir`` and save them to a
``.csv`` file.
>>> hsbatch.spectra_to_csv(base_dir=base_dir, search_ext='spec',
dir_level=0)
Writing mean spectra to a .csv file.
Number of input datacubes/spectra: 40
Output file location: F:\\nigo0024\Documents\hs_process_demo\spatial_mod\crop_many_gdf\mask_mcari2_90th\stats-spectra.csv
When ``stats-spectra.csv`` is opened in Microsoft Excel, we can see
that each row is a ``.spec`` file from a different plot, and each
column is a particular spectral band/wavelength.
.. image:: img/batch/spectra_to_csv.png
.. _batch.segment_band_math(): hs_process.batch.html#hs_process.batch.segment_band_math
.. _batch.segment_create_mask(): hs_process.batch.html#hs_process.batch.segment_create_mask
'''
if fname_list is None and base_dir is not None:
fname_list = self._recurs_dir(base_dir, search_ext, dir_level)
elif fname_list is None and base_dir is None:
# base_dir may have been stored to the ``batch`` object
base_dir = self.base_dir
msg = ('Please set ``fname_list`` or ``base_dir`` to indicate which '
'datacubes should be processed.\n')
assert base_dir is not None, msg
fname_list = self._recurs_dir(base_dir, search_ext, dir_level)
if base_dir_out is None:
fname_csv = os.path.join(base_dir, name + '.csv')
else:
fname_csv = os.path.join(base_dir_out, name + '.csv')
print('Writing mean spectra to a .csv file.\n'
'Number of input datacubes/spectra: {0}\nOutput file location: '
'{1}'.format(len(fname_list), fname_csv))
# df_spec = None
# for fname in fname_list:
# self.io.read_spec(fname + '.hdr')
# meta_bands = self.io.tools.meta_bands
# array = self.io.spyfile_spec.load()
# data = list(np.reshape(array, (array.shape[2])) * 100)
# data.insert(0, self.io.name_plot)
# data.insert(0, os.path.basename(fname))
# if df_spec is None:
# columns = list(meta_bands.values())
# columns.insert(0, 'wavelength')
# columns.insert(0, np.nan)
# bands = list(meta_bands.keys())
# bands.insert(0, 'plot_id')
# bands.insert(0, 'fname')
# df_spec = pd.DataFrame(data=[bands], columns=columns)
# df_spec_temp = pd.DataFrame(data=[data], columns=columns)
# df_spec = df_spec.append(df_spec_temp)
# load the data from the Spectral Python (SpyFile) object
self.io.read_spec(fname_list[0] + '.hdr') # read first file to build df_spec column headings
meta_bands = self.io.tools.meta_bands
columns = list(meta_bands.values())
columns.insert(0, 'wavelength')
columns.insert(0, np.nan)
bands = list(meta_bands.keys())
bands.insert(0, 'plot_id')
bands.insert(0, 'fname')
df_spec = pd.DataFrame(data=[bands], columns=columns)
# if multithread is True:
# with ThreadPoolExecutor() as executor: # defaults to min(32, os.cpu_count() + 4)
# future_df_spec = {
# executor.submit(self._read_spectra_from_file,
# fname,
# df_spec.columns): fname for fname in fname_list}
# for future in as_completed(future_df_spec):
# data = future_df_spec[future]
# try:
# df_spec_file = future.result()
# df_spec = df_spec.append(df_spec_file)
# except Exception as exc:
# print('%r generated an exception: %s' % (data, exc))
# else:
for fname in fname_list:
df_spec_file = self._read_spectra_from_file(fname, df_spec.columns)
df_spec = df_spec.append(df_spec_file)
df_spec.to_csv(fname_csv, index=False)
[docs] def spectra_to_df(self, fname_list=None, base_dir=None, search_ext='spec',
dir_level=0, multithread=False):
'''
Reads all the .spec files in a direcory and returns their data as a
``pandas.DataFrame`` object. ``batch.spectra_to_df`` is identical to
``batch.spectra_to_csv`` except a ``pandas.DataFrame`` is returned
rather than saving a ``.csv`` file.
Parameters:
fname_list (``list``, optional): list of filenames to process; if
left to ``None``, will look at ``base_dir``, ``search_ext``, and
``dir_level`` parameters for files to process (default: ``None``).
base_dir (``str``, optional): directory path to search for files to
spectrally clip; if ``fname_list`` is not ``None``, ``base_dir`` will
be ignored (default: ``None``).
search_ext (``str``): file format/extension to search for in all
directories and subdirectories to determine which files to
process; if ``fname_list`` is not ``None``, ``search_ext`` will
be ignored (default: 'bip').
dir_level (``int``): The number of directory levels to search; if
``None``, searches all directory levels (default: 0).
multithread (``bool``): Whether to leverage multi-thread processing
when reading the .spec files. Setting to ``True`` should speed
up the time it takes to read all .spec files.
Note:
The following example builds on the API example results of
`batch.segment_band_math()`_ and `batch.segment_create_mask()_.
Please complete each of those API examples to be sure your
directory (i.e.,
``F:\\nigo0024\Documents\hs_process_demo\spatial_mod\crop_many_gdf\mask_mcari2_90th``)
is populated with image files.
Example:
Load and initialize the ``batch`` module, checking to be sure the
directory exists.
>>> import os
>>> from hs_process import batch
>>> base_dir = r'F:\\nigo0024\Documents\hs_process_demo\spatial_mod\crop_many_gdf\mask_mcari2_90th'
>>> print(os.path.isdir(base_dir))
True
>>> hsbatch = batch(base_dir)
Read all the ``.spec`` files in ``base_dir`` and load them to
``df_spec``, a ``pandas.DataFrame``.
>>> df_spec = hsbatch.spectra_to_df(base_dir=base_dir, search_ext='spec',
dir_level=0)
Writing mean spectra to a ``pandas.DataFrame``.
Number of input datacubes/spectra: 40
When visualizing ``df_spe`` in `Spyder`_, we can see that each row
is a ``.spec`` file from a different plot, and each column is a
particular spectral band.
.. image:: img/batch/spectra_to_df.png
It is somewhat confusing to conceptualize spectral data by band
number (as opposed to the wavelenth it represents).
``hs_process.hs_tools.get_band`` can be used to retrieve
spectral data for all plots via indexing by wavelength. Say we need
to access reflectance at 710 nm for each plot.
>>> df_710nm = df_spec[['fname', 'plot_id', hsbatch.io.tools.get_band(710)]]
.. image:: img/batch/spectra_to_df_710nm.png
.. _batch.segment_band_math(): hs_process.batch.html#hs_process.batch.segment_band_math
.. _batch.segment_create_mask(): hs_process.batch.html#hs_process.batch.segment_create_mask
.. _Spyder: https://www.spyder-ide.org/
'''
if fname_list is None and base_dir is not None:
fname_list = self._recurs_dir(base_dir, search_ext, dir_level)
elif fname_list is None and base_dir is None:
# base_dir may have been stored to the ``batch`` object
base_dir = self.base_dir
msg = ('Please set ``fname_list`` or ``base_dir`` to indicate which '
'datacubes should be processed.\n')
assert base_dir is not None, msg
fname_list = self._recurs_dir(base_dir, search_ext, dir_level)
print('Writing mean spectra to a ``pandas.DataFrame``.\n'
'Number of input datacubes/spectra: {0}'
''.format(len(fname_list)))
# load the data from the Spectral Python (SpyFile) object
# df_spec = None
# for fname in fname_list:
# self.io.read_spec(fname + '.hdr')
# meta_bands = self.io.tools.meta_bands
# array = self.io.spyfile_spec.load()
# data = list(np.reshape(array, (array.shape[2])))
# data.insert(0, self.io.name_plot)
# data.insert(0, os.path.basename(fname))
# if df_spec is None:
# bands = list(meta_bands.keys())
# bands.insert(0, 'plot_id')
# bands.insert(0, 'fname')
# df_spec = pd.DataFrame(columns=bands)
# df_spec_temp = pd.DataFrame(data=[data], columns=bands)
# df_spec = df_spec.append(df_spec_temp)
# read first file to build df_spec column headings
self.io.read_spec(fname_list[0] + '.hdr')
meta_bands = self.io.tools.meta_bands
bands = list(meta_bands.keys())
bands.insert(0, 'plot_id')
bands.insert(0, 'fname')
df_spec = pd.DataFrame(columns=bands)
# if multithread is True:
# with ThreadPoolExecutor() as executor: # defaults to min(32, os.cpu_count() + 4)
# future_df_spec = {
# executor.submit(self._read_spectra_from_file,
# fname,
# df_spec.columns): fname for fname in fname_list}
# for future in as_completed(future_df_spec):
# data = future_df_spec[future]
# try:
# df_spec_file = future.result()
# df_spec = df_spec.append(df_spec_file)
# except Exception as exc:
# print('%r generated an exception: %s' % (data, exc))
# else:
for fname in fname_list:
df_spec_file = self._read_spectra_from_file(fname, df_spec.columns)
df_spec = df_spec.append(df_spec_file)
try:
df_spec['plot_id'] = pd.to_numeric(df_spec['plot_id'])
except ValueError:
print('Unable to convert "plot_id" column to numeric type.\n')
return df_spec.reset_index(drop=True)
[docs] def spectral_clip(self, fname_list=None, base_dir=None, search_ext='bip',
dir_level=0, base_dir_out=None, folder_name='spec_clip',
name_append='spec-clip',
wl_bands=[[0, 420], [760, 776], [813, 827], [880, 1000]],
out_dtype=False, out_force=None, out_ext=False,
out_interleave=False, out_byteorder=False):
'''
Batch processing tool to spectrally clip multiple datacubes in the same
way.
Parameters:
fname_list (``list``, optional): list of filenames to process; if
left to ``None``, will look at ``base_dir``, ``search_ext``, and
``dir_level`` parameters for files to process (default: ``None``).
base_dir (``str``, optional): directory path to search for files to
spectrally clip; if ``fname_list`` is not ``None``, ``base_dir`` will
be ignored (default: ``None``).
search_ext (``str``): file format/extension to search for in all
directories and subdirectories to determine which files to
process; if ``fname_list`` is not ``None``, ``search_ext`` will
be ignored (default: 'bip').
dir_level (``int``): The number of directory levels to search; if
``None``, searches all directory levels (default: 0).
base_dir_out (``str``): directory path to save all processed
datacubes; if set to ``None``, a folder named according to the
``folder_name`` parameter is added to ``base_dir``
folder_name (``str``): folder to add to ``base_dir_out`` to save all
the processed datacubes (default: 'spec-clip').
name_append (``str``): name to append to the filename (default:
'spec-clip').
wl_bands (``list`` or ``list of lists``): minimum and maximum
wavelenths to clip from image; if multiple groups of
wavelengths should be cut, this should be a list of lists. For
example, wl_bands=[760, 776] will clip all bands greater than
760.0 nm and less than 776.0 nm;
wl_bands = [[0, 420], [760, 776], [813, 827], [880, 1000]]
will clip all band less than 420.0 nm, bands greater than 760.0
nm and less than 776.0 nm, bands greater than 813.0 nm and less
than 827.0 nm, and bands greater than 880 nm (default).
out_XXX: Settings for saving the output files can be adjusted here
if desired. They are stored in ``batch.io.defaults, and are
therefore accessible at a high level. See
``hsio.set_io_defaults()`` for more information on each of the
settings.
Note:
The following ``batch`` example builds on the API example results
of the `batch.spatial_crop`_ function. Please complete the
`batch.spatial_crop`_ example to be sure your directory
(i.e., ``base_dir``) is populated with multiple hyperspectral
datacubes. The following example will be using datacubes located in
the following directory:
``F:\\nigo0024\Documents\hs_process_demo\spatial_crop``
Example:
Load and initialize the ``batch`` module, checking to be sure the
directory exists.
>>> import os
>>> from hs_process import batch
>>> base_dir = r'F:\\nigo0024\Documents\hs_process_demo\spatial_crop'
>>> print(os.path.isdir(base_dir))
True
>>> hsbatch = batch(base_dir, search_ext='.bip', progress_bar=True) # searches for all files in ``base_dir`` with a ".bip" file extension
Use ``batch.spectral_clip`` to clip all spectral bands below
*420 nm* and above *880 nm*, as well as the bands near the oxygen
absorption (i.e., *760-776 nm*) and water absorption
(i.e., *813-827 nm*) regions.
>>> hsbatch.spectral_clip(base_dir=base_dir, folder_name='spec_clip',
wl_bands=[[0, 420], [760, 776], [813, 827], [880, 1000]],
out_force=True)
Processing 40 files. If this is not what is expected, please check if files have already undergone processing. If existing files should be overwritten, be sure to set the ``out_force`` parameter.
Processing file 39/40: 100%|██████████| 40/40 [00:01<00:00, 26.68it/s]
Use ``seaborn`` to visualize the spectra of a single pixel in one
of the processed images.
>>> import seaborn as sns
>>> fname = os.path.join(base_dir, 'Wells_rep2_20180628_16h56m_pika_gige_7_1011-spatial-crop.bip')
>>> hsbatch.io.read_cube(fname)
>>> spy_mem = hsbatch.io.spyfile.open_memmap() # datacube before clipping
>>> meta_bands = list(hsbatch.io.tools.meta_bands.values())
>>> fname = os.path.join(base_dir, 'spec_clip', 'Wells_rep2_20180628_16h56m_pika_gige_7_1011-spec-clip.bip')
>>> hsbatch.io.read_cube(fname)
>>> spy_mem_clip = hsbatch.io.spyfile.open_memmap() # datacube after clipping
>>> meta_bands_clip = list(hsbatch.io.tools.meta_bands.values())
>>> ax = sns.lineplot(x=meta_bands, y=spy_mem[26][29], label='Before spectral clipping', linewidth=3)
>>> ax = sns.lineplot(x=meta_bands_clip, y=spy_mem_clip[26][29], label='After spectral clipping', ax=ax)
>>> ax.set_xlabel('Wavelength (nm)', weight='bold')
>>> ax.set_ylabel('Reflectance (%)', weight='bold')
>>> ax.set_title(r'API Example: `batch.spectral_clip`', weight='bold')
.. image:: img/batch/spectral_clip_plot.png
Notice the spectral areas that were clipped, namely the oxygen and
water absorption regions (~770 and ~820 nm, respectively). There
is perhaps a lower *signal:noise* ratio in these regions, which was
the merit for clipping those bands out.
.. _batch.spatial_crop: hs_process.batch.html#hs_process.batch.spatial_crop
'''
self.io.set_io_defaults(out_dtype, out_force, out_ext, out_interleave,
out_byteorder)
if fname_list is None and base_dir is not None:
fname_list = self._recurs_dir(base_dir, search_ext, dir_level)
elif fname_list is None and base_dir is None:
# base_dir may have been stored to the ``batch`` object
base_dir = self.base_dir
msg = ('Please set ``fname_list`` or ``base_dir`` to indicate which '
'datacubes should be processed.\n')
assert base_dir is not None, msg
fname_list = self._recurs_dir(base_dir, search_ext, dir_level)
if self.io.defaults.envi_write.force is False: # otherwise just overwrites if it exists
fname_list = self._check_processed(fname_list, base_dir_out,
folder_name, name_append)
self._execute_spec_clip(fname_list, base_dir_out, folder_name,
name_append, wl_bands)
# with ThreadPoolExecutor(max_workers=None) as executor: # defaults to min(32, os.cpu_count() + 4)
# future_to_clip = {
# executor.submit(self._execute_spec_clip_pp,
# fname,
# base_dir_out, folder_name, name_append, wl_bands): fname for fname in fname_list}
[docs] def spectral_mimic(
self, fname_list=None, base_dir=None, search_ext='bip',
dir_level=0, base_dir_out=None, folder_name='spec_mimic',
name_append='spec-mimic', sensor='sentinel-2a',
df_band_response=None, col_wl='wl_nm', center_wl='peak',
out_dtype=False, out_force=None, out_ext=False,
out_interleave=False, out_byteorder=False):
'''
Batch processing tool to spectrally mimic a multispectral sensor for
multiple datacubes in the same way.
Parameters:
fname_list (``list``, optional): list of filenames to process; if
left to ``None``, will look at ``base_dir``, ``search_ext``,
and ``dir_level`` parameters for files to process (default:
``None``).
base_dir (``str``, optional): directory path to search for files to
spectrally resample; if ``fname_list`` is not ``None``,
``base_dir`` will be ignored (default: ``None``).
search_ext (``str``): file format/extension to search for in all
directories and subdirectories to determine which files to
process; if ``fname_list`` is not ``None``, ``search_ext`` will
be ignored (default: 'bip').
dir_level (``int``): The number of directory levels to search; if
``None``, searches all directory levels (default: 0).
base_dir_out (``str``): directory path to save all processed
datacubes; if set to ``None``, a folder named according to the
``folder_name`` parameter is added to ``base_dir``
folder_name (``str``): folder to add to ``base_dir_out`` to save
all the processed datacubes (default: 'spec_bin').
name_append (``str``): name to append to the filename (default:
'spec-bin').
sensor (``str``): Should be one of
["sentera_6x", "micasense_rededge_3", "sentinel-2a",
"sentinel-2b", "custom"]; if "custom", ``df_band_response``
and ``col_wl`` must be passed.
df_band_response (``pd.DataFrame``): A DataFrame that contains the
transmissivity (%) for each sensor band (as columns) mapped to
the continuous wavelength values (as rows). Required if
``sensor`` is "custom", ignored otherwise.
col_wl (``str``): The column of ``df_band_response`` denoting the
wavlengths (default: 'wl_nm').
center_wl (``str``): Indicates how the center wavelength of each
band is determined. If ``center_wl`` is "peak", the point at
which transmissivity is at its maximum is used as the center
wavelength. If ``center_wl`` is "weighted", the weighted
average is used to compute the center wavelength. Must be one
of ["peak", "weighted"] (``default: "peak"``).
out_XXX: Settings for saving the output files can be adjusted here
if desired. They are stored in ``batch.io.defaults, and are
therefore accessible at a high level. See
``hsio.set_io_defaults()`` for more information on each of the
settings.
Note:
The following ``batch`` example builds on the API example results
of the `batch.spatial_crop`_ function. Please complete the
`batch.spatial_crop`_ example to be sure your directory
(i.e., ``base_dir``) is populated with multiple hyperspectral
datacubes. The following example will be using datacubes located in
the following directory:
``F:\\nigo0024\Documents\hs_process_demo\spatial_crop``
Example:
Load and initialize the ``batch`` module, checking to be sure the
directory exists.
>>> import os
>>> from hs_process import batch
>>> base_dir = r'F:\\nigo0024\Documents\hs_process_demo\spatial_crop'
>>> print(os.path.isdir(base_dir))
True
>>> hsbatch = batch(base_dir, search_ext='.bip', progress_bar=True) # searches for all files in ``base_dir`` with a ".bip" file extension
Use ``batch.spectral_mimic`` to spectrally mimic the Sentinel-2A
multispectral satellite sensor.
>>> hsbatch.spectral_mimic(
base_dir=base_dir, folder_name='spec_mimic',
name_append='sentinel-2a',
sensor='sentinel-2a', center_wl='weighted')
Processing 40 files. If existing files should be overwritten, be sure to set the ``out_force`` parameter.
Processing file 39/40: 100%|██████████| 40/40 [00:04<00:00, 8.85it/s]
Use ``seaborn`` to visualize the spectra of a single pixel in one
of the processed images.
>>> import seaborn as sns
>>> fname = os.path.join(base_dir, 'Wells_rep2_20180628_16h56m_pika_gige_7_1011-spatial-crop.bip')
>>> hsbatch.io.read_cube(fname)
>>> spy_mem = hsbatch.io.spyfile.open_memmap() # datacube before mimicking
>>> meta_bands = list(hsbatch.io.tools.meta_bands.values())
>>> fname = os.path.join(base_dir, 'spec_mimic', 'Wells_rep2_20180628_16h56m_pika_gige_7_1011-sentinel-2a.bip')
>>> hsbatch.io.read_cube(fname)
>>> spy_mem_sen2a = hsbatch.io.spyfile.open_memmap() # datacube after mimicking
>>> meta_bands_sen2a = list(hsbatch.io.tools.meta_bands.values())
>>> ax = sns.lineplot(x=meta_bands, y=spy_mem[26][29], label='Hyperspectral (Pika II)', linewidth=3)
>>> ax = sns.lineplot(x=meta_bands_sen2a, y=spy_mem_sen2a[26][29], label='Sentinel-2A "mimic"', marker='o', ms=6, ax=ax)
>>> ax.set_xlabel('Wavelength (nm)', weight='bold')
>>> ax.set_ylabel('Reflectance (%)', weight='bold')
>>> ax.set_title(r'API Example: `batch.spectral_mimic`', weight='bold')
.. image:: img/batch/spectral_mimic_sentinel-2a_plot.png
.. _batch.spatial_crop: hs_process.batch.html#hs_process.batch.spatial_crop
'''
self.io.set_io_defaults(out_dtype, out_force, out_ext, out_interleave,
out_byteorder)
if fname_list is None and base_dir is not None:
fname_list = self._recurs_dir(base_dir, search_ext, dir_level)
elif fname_list is None and base_dir is None:
# base_dir may have been stored to the ``batch`` object
base_dir = self.base_dir
msg = ('Please set ``fname_list`` or ``base_dir`` to indicate '
'which datacubes should be processed.\n')
assert base_dir is not None, msg
fname_list = self._recurs_dir(base_dir, search_ext, dir_level)
if self.io.defaults.envi_write.force is False: # otherwise just overwrites if it exists
fname_list = self._check_processed(fname_list, base_dir_out,
folder_name, name_append)
self._execute_spec_mimic(fname_list, base_dir_out, folder_name,
name_append, sensor, df_band_response,
col_wl, center_wl)
[docs] def spectral_resample(
self, fname_list=None, base_dir=None, search_ext='bip',
dir_level=0, base_dir_out=None, folder_name='spec_bin',
name_append='spec-bin', bandwidth=None, bins_n=None,
out_dtype=False, out_force=None, out_ext=False,
out_interleave=False, out_byteorder=False):
'''
Batch processing tool to spectrally resample (a.k.a. "bin") multiple
datacubes in the same way.
Parameters:
fname_list (``list``, optional): list of filenames to process; if
left to ``None``, will look at ``base_dir``, ``search_ext``,
and ``dir_level`` parameters for files to process (default:
``None``).
base_dir (``str``, optional): directory path to search for files to
spectrally resample; if ``fname_list`` is not ``None``,
``base_dir`` will be ignored (default: ``None``).
search_ext (``str``): file format/extension to search for in all
directories and subdirectories to determine which files to
process; if ``fname_list`` is not ``None``, ``search_ext`` will
be ignored (default: 'bip').
dir_level (``int``): The number of directory levels to search; if
``None``, searches all directory levels (default: 0).
base_dir_out (``str``): directory path to save all processed
datacubes; if set to ``None``, a folder named according to the
``folder_name`` parameter is added to ``base_dir``
folder_name (``str``): folder to add to ``base_dir_out`` to save
all the processed datacubes (default: 'spec_bin').
name_append (``str``): name to append to the filename (default:
'spec-bin').
bandwidth (``float`` or ``int``): The bandwidth of the bands
after spectral resampling is complete (units should be
consistent with that of the .hdr file). Setting ``bandwidth``
to 10 will consolidate bands that fall within every 10 nm
interval.
bins_n (``int``): The number of bins (i.e., "bands") to achieve
after spectral resampling is complete. Ignored if ``bandwidth``
is not ``None``.
out_XXX: Settings for saving the output files can be adjusted here
if desired. They are stored in ``batch.io.defaults, and are
therefore accessible at a high level. See
``hsio.set_io_defaults()`` for more information on each of the
settings.
Note:
The following ``batch`` example builds on the API example results
of the `batch.spatial_crop`_ function. Please complete the
`batch.spatial_crop`_ example to be sure your directory
(i.e., ``base_dir``) is populated with multiple hyperspectral
datacubes. The following example will be using datacubes located in
the following directory:
``F:\\nigo0024\Documents\hs_process_demo\spatial_crop``
Example:
Load and initialize the ``batch`` module, checking to be sure the
directory exists.
>>> import os
>>> from hs_process import batch
>>> base_dir = r'F:\\nigo0024\Documents\hs_process_demo\spatial_crop'
>>> print(os.path.isdir(base_dir))
True
>>> hsbatch = batch(base_dir, search_ext='.bip', progress_bar=True) # searches for all files in ``base_dir`` with a ".bip" file extension
Use ``batch.spectral_resample`` to bin ("group") all spectral bands
into 20 nm bandwidth bands (from ~2.3 nm bandwidth originally) on
a per-pixel basis.
>>> hsbatch.spectral_resample(
base_dir=base_dir, folder_name='spec_bin',
name_append='spec-bin-20', bandwidth=20)
Processing 40 files. If existing files should be overwritten, be sure to set the ``out_force`` parameter.
Processing file 39/40: 100%|██████████| 40/40 [00:00<00:00, 48.31it/s]
...
Use ``seaborn`` to visualize the spectra of a single pixel in one
of the processed images.
>>> import seaborn as sns
>>> fname = os.path.join(base_dir, 'Wells_rep2_20180628_16h56m_pika_gige_7_1011-spatial-crop.bip')
>>> hsbatch.io.read_cube(fname)
>>> spy_mem = hsbatch.io.spyfile.open_memmap() # datacube before resampling
>>> meta_bands = list(hsbatch.io.tools.meta_bands.values())
>>> fname = os.path.join(base_dir, 'spec_bin', 'Wells_rep2_20180628_16h56m_pika_gige_7_1011-spec-bin-20.bip')
>>> hsbatch.io.read_cube(fname)
>>> spy_mem_bin = hsbatch.io.spyfile.open_memmap() # datacube after resampling
>>> meta_bands_bin = list(hsbatch.io.tools.meta_bands.values())
>>> ax = sns.lineplot(x=meta_bands, y=spy_mem[26][29], label='Hyperspectral (Pika II)', linewidth=3)
>>> ax = sns.lineplot(x=meta_bands_bin, y=spy_mem_bin[26][29], label='Spectral resample (20 nm)', marker='o', ms=6, ax=ax)
>>> ax.set_xlabel('Wavelength (nm)', weight='bold')
>>> ax.set_ylabel('Reflectance (%)', weight='bold')
>>> ax.set_title(r'API Example: `batch.spectral_resample`', weight='bold')
.. image:: img/batch/spectral_resample-20nm_plot.png
.. _batch.spatial_crop: hs_process.batch.html#hs_process.batch.spatial_crop
'''
self.io.set_io_defaults(out_dtype, out_force, out_ext, out_interleave,
out_byteorder)
if fname_list is None and base_dir is not None:
fname_list = self._recurs_dir(base_dir, search_ext, dir_level)
elif fname_list is None and base_dir is None:
# base_dir may have been stored to the ``batch`` object
base_dir = self.base_dir
msg = ('Please set ``fname_list`` or ``base_dir`` to indicate '
'which datacubes should be processed.\n')
assert base_dir is not None, msg
fname_list = self._recurs_dir(base_dir, search_ext, dir_level)
if self.io.defaults.envi_write.force is False: # otherwise just overwrites if it exists
fname_list = self._check_processed(fname_list, base_dir_out,
folder_name, name_append)
self._execute_spec_resample(fname_list, base_dir_out, folder_name,
name_append, bandwidth, bins_n)
[docs] def spectral_smooth(self, fname_list=None, base_dir=None, search_ext='bip',
dir_level=0, base_dir_out=None,
folder_name='spec_smooth', name_append='spec-smooth',
window_size=11, order=2, stats=False,
out_dtype=False, out_force=None, out_ext=False,
out_interleave=False, out_byteorder=False):
'''
Batch processing tool to spectrally smooth multiple datacubes in the
same way.
Parameters:
fname_list (``list``, optional): list of filenames to process; if
left to ``None``, will look at ``base_dir``, ``search_ext``, and
``dir_level`` parameters for files to process (default: ``None``).
base_dir (``str``, optional): directory path to search for files to
spectrally clip; if ``fname_list`` is not ``None``, ``base_dir`` will
be ignored (default: ``None``).
search_ext (``str``): file format/extension to search for in all
directories and subdirectories to determine which files to
process; if ``fname_list`` is not ``None``, ``search_ext`` will
be ignored (default: 'bip').
dir_level (``int``): The number of directory levels to search; if
``None``, searches all directory levels (default: 0).
base_dir_out (``str``): directory path to save all processed
datacubes; if set to ``None``, a folder named according to the
``folder_name`` parameter is added to ``base_dir``
folder_name (``str``): folder to add to ``base_dir_out`` to save all
the processed datacubes (default: 'spec-smooth').
name_append (``str``): name to append to the filename (default:
'spec-smooth').
window_size (``int``): the length of the window; must be an odd
integer number (default: 11).
order (``int``): the order of the polynomial used in the filtering;
must be less than ``window_size`` - 1 (default: 2).
stats (``bool``): whether to compute some basic descriptive
statistics (mean, st. dev., and coefficient of variation) of
the smoothed data array (default: ``False``)
out_XXX: Settings for saving the output files can be adjusted here
if desired. They are stored in ``batch.io.defaults, and are
therefore accessible at a high level. See
``hsio.set_io_defaults()`` for more information on each of the
settings.
Note:
The following ``batch`` example builds on the API example results
of the `batch.spatial_crop`_ function. Please complete the
`batch.spatial_crop`_ example to be sure your directory
(i.e., ``base_dir``) is populated with multiple hyperspectral
datacubes. The following example will be using datacubes located in
the following directory:
``F:\\nigo0024\Documents\hs_process_demo\spatial_crop``
Example:
Load and initialize the ``batch`` module, checking to be sure the
directory exists.
>>> import os
>>> from hs_process import batch
>>> base_dir = r'F:\\nigo0024\Documents\hs_process_demo\spatial_crop'
>>> print(os.path.isdir(base_dir))
True
>>> hsbatch = batch(base_dir, search_ext='.bip') # searches for all files in ``base_dir`` with a ".bip" file extension
Use ``batch.spectral_smooth`` to perform a *Savitzky-Golay*
smoothing operation on each image/pixel in ``base_dir``. The
``window_size`` and ``order`` can be adjusted to achieve desired
smoothing results.
>>> hsbatch.spectral_smooth(base_dir=base_dir, folder_name='spec_smooth',
window_size=11, order=2)
Processing 40 files. If this is not what is expected, please check if files have already undergone processing. If existing files should be overwritten, be sure to set the ``out_force`` parameter.
Spectrally smoothing: F:\\nigo0024\Documents\hs_process_demo\spatial_crop\Wells_rep2_20180628_16h56m_pika_gige_7_1011-spatial-crop.bip
Saving F:\\nigo0024\Documents\hs_process_demo\spatial_crop\spec_smooth\Wells_rep2_20180628_16h56m_pika_gige_7_1011-spec-smooth.bip
Spectrally smoothing: F:\\nigo0024\Documents\hs_process_demo\spatial_crop\Wells_rep2_20180628_16h56m_pika_gige_7_1012-spatial-crop.bip
Saving F:\\nigo0024\Documents\hs_process_demo\spatial_crop\spec_smooth\Wells_rep2_20180628_16h56m_pika_gige_7_1012-spec-smooth.bip
...
Use ``seaborn`` to visualize the spectra of a single pixel in one
of the processed images.
>>> import seaborn as sns
>>> fname = os.path.join(base_dir, 'Wells_rep2_20180628_16h56m_pika_gige_7_1011-spatial-crop.bip')
>>> hsbatch.io.read_cube(fname)
>>> spy_mem = hsbatch.io.spyfile.open_memmap() # datacube before smoothing
>>> meta_bands = list(hsbatch.io.tools.meta_bands.values())
>>> fname = os.path.join(base_dir, 'spec_smooth', 'Wells_rep2_20180628_16h56m_pika_gige_7_1011-spec-smooth.bip')
>>> hsbatch.io.read_cube(fname)
>>> spy_mem_clip = hsbatch.io.spyfile.open_memmap() # datacube after smoothing
>>> meta_bands_clip = list(hsbatch.io.tools.meta_bands.values())
>>> ax = sns.lineplot(x=meta_bands, y=spy_mem[26][29], label='Before spectral smoothing', linewidth=3)
>>> ax = sns.lineplot(x=meta_bands_clip, y=spy_mem_clip[26][29], label='After spectral smoothing', ax=ax)
>>> ax.set_xlabel('Wavelength (nm)', weight='bold')
>>> ax.set_ylabel('Reflectance (%)', weight='bold')
>>> ax.set_title(r'API Example: `batch.spectral_smooth`', weight='bold')
.. image:: img/batch/spectral_smooth_plot.png
Notice how the *"choppiness"* of the spectral curve is lessened
after the smoothing operation. There are spectral regions that
perhaps had a lower *signal:noise* ratio and did not do particularlly
well at smoothing (i.e., < 410 nm, ~770 nm, and ~820 nm). It may be
wise to perform ``batch.spectral_smooth`` *after*
`batch.spectral_clip`_.
.. _batch.spatial_crop: hs_process.batch.html#hs_process.batch.spatial_crop
.. _batch.spectral_clip: hs_process.batch.html#hs_process.batch.spectral_clip
'''
self.io.set_io_defaults(out_dtype, out_force, out_ext, out_interleave,
out_byteorder)
if fname_list is None and base_dir is not None:
fname_list = self._recurs_dir(base_dir, search_ext, dir_level)
elif fname_list is None and base_dir is None:
# base_dir may have been stored to the ``batch`` object
base_dir = self.base_dir
msg = ('Please set ``fname_list`` or ``base_dir`` to indicate which '
'datacubes should be processed.\n')
assert base_dir is not None, msg
fname_list = self._recurs_dir(base_dir, search_ext, dir_level)
if self.io.defaults.envi_write.force is False: # otherwise just overwrites if it exists
fname_list = self._check_processed(fname_list, base_dir_out,
folder_name, name_append)
self._execute_spec_smooth(
fname_list, base_dir_out, folder_name, name_append,
window_size, order, stats)
# if df_stats is not None:
# return df_stats
# # Parallel threading
# with ThreadPoolExecutor(max_workers=None) as executor: # defaults to min(32, os.cpu_count() + 4)
# future_to_clip = {
# executor.submit(self._execute_spec_smooth_pp,
# fname,
# base_dir_out, folder_name, name_append,
# window_size, order, stats): fname for fname in fname_list}
# df_stats = None
# if stats == True:
# for future in as_completed(future_to_clip):
# smooth = future_to_clip[future]
# try:
# df_smooth_temp = future.result()
# if df_stats is None:
# df_stats = df_smooth_temp.copy()
# else:
# df_stats = df_stats.append(df_smooth_temp, ignore_index=True)
# except Exception as exc:
# print('%r generated an exception: %s' % (smooth, exc))
# else:
# print('%r page is %d bytes' % (smooth, len(data)))
# base_dir = os.path.dirname(fname_list[0])
# if base_dir_out is None:
# dir_out, name_append = self._save_file_setup(
# base_dir, folder_name, name_append)
# else:
# dir_out, name_append = self._save_file_setup(
# base_dir_out, folder_name, name_append)
# fname_stats = os.path.join(dir_out, name_append[1:] + '-stats.csv')
# if os.path.isfile(fname_stats) and self.io.defaults.envi_write.force is False:
# df_stats_in = pd.read_csv(fname_stats)
# df_smooth_stats = df_stats_in.append(df_stats)
# df_smooth_stats.to_csv(fname_stats)
# return df_smooth_stats