Source code for ArrayViewer.Data

"""
Data Loader for the ArrayViewer.
"""
# Author: Alex Schwarz <alex.schwarz@informatik.tu-chemnitz.de>
try:
    import cPickle as pickle
except ImportError:
    import pickle

import os
import re
import scipy.io
import h5py
from h5py._hl import files, group, dataset
from PyQt5.QtCore import QObject, pyqtSignal, pyqtSlot
from PIL import Image
import numpy as np

[docs]class Loader(QObject): """ Seperate Loader to simultaneously load data. """ doneLoading = pyqtSignal(dict, str) load = pyqtSignal(str, str, bool) infoMsg = pyqtSignal(str, int) def __init__(self, parent=None): """ Initialize the Loader. """ super(Loader, self).__init__(parent) self.fname = '' self.switch_to_last = False self.load.connect(self._add_data) def _validate(self, data): """ Data validation. Replace lists of numbers with np.ndarray.""" if isinstance(data, dict): # Run the validation again for each subelement in the dict data = {str(key): self._validate(data[key]) for key in data.keys() if str(key)[:2] != "__"} elif isinstance(data, list): if data != [] and not isinstance(data[0], str): # not all elements in the list have the same length if isinstance(data[0], list) and len(set(map(len, data))) != 1: maxlen = len(sorted(data, key=len, reverse=True)[0]) data = [[xi+[np.nan]*(maxlen - len(xi))] for xi in data] try: dat = np.array(data) if dat.dtype == "O": data = self._validate( {str(k): v for k, v in enumerate(data)}) else: data = dat except ValueError: data = self._validate( {str(k): v for k, v in enumerate(data)}) elif isinstance(data, scipy.io.matlab.mio5_params.mat_struct): # Create a dictionary from matlab structs dct = {} for key in data._fieldnames: exec("dct[key] = self._validate(data.%s)"%key) data = dct elif isinstance(data, np.ndarray) and data.dtype == "O": # Create numpy arrays from matlab cell types if not data.shape: data = self._validate(data[()]) else: data = self._validate([self._validate(sd) for sd in data]) elif isinstance(data, (files.File, group.Group)): data = {key: self._validate(data[key]) for key in data if key != "#refs#"} elif isinstance(data, dataset.Dataset) and data.dtype == "O": dat = np.empty_like(data) for x, d in enumerate(data[()]): names = [h5py.h5r.get_name(sd, data.file.id) for sd in d] dat[x, :] = [np.array(data.file[name]).tobytes() .decode(encoding="utf-16") if data.file[name].dtype == "uint16" else data.file[name] for name in names] try: data = dat.astype(str).squeeze().tolist() except ValueError: data = np.array([data.file.get(d[0]) for d in data[()]][0]) elif not isinstance(data, (np.ndarray, dataset.Dataset, int, float, str, type(u''), tuple)): self.infoMsg.emit("DataType (" + type(data) + ") not recognized. Skipping", 0) data = None if isinstance(data, (np.ndarray, dataset.Dataset)) and \ self.switch_to_last and len(data.shape) > 1: data = np.moveaxis(data, 0, -1) return data @pyqtSlot(str, str, bool) def _add_data(self, fname, key, switch_to_last=False): """ Add a new data to the dataset. Ask if the data already exists. """ self.switch_to_last = switch_to_last # Check if the File is bigger than 15 GB, than it will not be loaded if os.path.getsize(fname) > 15e9: self.infoMsg.emit("File bigger than 15GB. Not loading!", -1) self.doneLoading.emit({}, '') return False # Load the different data types if fname[-5:] == '.hdf5': data = self._validate(h5py.File(str(fname), 'r')) elif fname[-4:] == '.mat': try: # old matlab versions data = self._validate(scipy.io.loadmat(str(fname), squeeze_me=True, struct_as_record=False)) except NotImplementedError: # v7.3 data = self._validate(h5py.File(str(fname), "r")) elif fname[-4:] == '.npy': try: data = self._validate(np.load(str(fname), allow_pickle=True)) except UnicodeDecodeError: data = self._validate(np.load(str(fname), allow_pickle=True, encoding='latin1')) elif fname[-5:] == '.data': try: f = pickle.load(open(str(fname))) except UnicodeDecodeError: f = pickle.load(open(str(fname), 'rb'), encoding='latin1') data = self._validate(f) elif fname[-4:] == '.txt': lines = open(fname).readlines() numberRegEx = r'([-+]?\d+\.?\d*(?:[eE][-+]\d+)?)' lil = [re.findall(numberRegEx, line) for line in lines] data = {'Value': np.array(lil, dtype=float)} else: try: img = Image.open(fname) data = {'Value': np.swapaxes(np.array(img), 0, 1)} except (OSError, FileNotFoundError): print('File type not recognized!') return False if not isinstance(data, dict): data = {'Value': data} self.doneLoading.emit(data, key) return True