Tomás Rojas
02/27/2023, 7:25 PMCustomDataSetPartitionedDataSetclass LedExperiment(AbstractDataSet):
    def __init__(self, filepath: str):
        breakpoint()
        self.path = filepath
        self.files = glob.glob(os.path.join(filepath, "*"))
        self.files.sort()
        self.gate_voltage = self.get_gate_voltage(self.path)
        self.info_path, self.voltages_path, self.data_path = self.files
    @staticmethod
    def get_gate_voltage(path: str) -> float:
        """
        This is a function that is able to get the gate voltage from the folder name
        that is the root of the data
        :param path: path of the data, ussualy but not restricted to self.path
        :return: the voltage from the Dirac Point used as gate voltage
        """
        # note: sometimes there is more than one measurement for one voltage from the DP, it should
        # be always separed by an underscore "_".
        breakpoint()
        folder_name = os.path.split(path)[-1]
        gate_voltage = float(folder_name)
        return gate_voltage
    @staticmethod
    def get_info(path: str, gate_voltage: float) -> pd.DataFrame:
        """
        This method takes a path to the info file and returns a pandas
        datatrame of one row and the info in each column
        :param path: path to the info file of the experiment
        :param gate_voltage: this is the gate voltage with respect to the Dirac Point
        :return: a pandas dataframe with the parsed information
        """
        with open(path, "r") as f:
            r = f.read()
        r = r.split("\n")[1:-2]
        r = [i.split(",") for i in r]
        r = [item for sublist in r for item in sublist]
        r = [i.replace(" ", "") for i in r]
        r = {i.split("=")[0]: i.split("=")[1] for i in r}
        r["Vmin"] = float(r["Vmin"][:-1])
        r["Vmax"] = float(r["Vmax"][:-1])
        r["Vstep"] = float(r["Vstep"][:-1])
        r["Cycles"] = int(r["Cycles"])
        r["waitingtime"] = float(r["waitingtime"][:-1])
        r["timeatlight"] = float(r["timeatlight"][:-1])
        r["timeatdark"] = float(r["timeatdark"][:-1])
        r["wavelength"] = float(r["wavelength"][:-2])
        r["gate_voltage"] = gate_voltage
        info = pd.DataFrame(r, index=["value"])
        return info
    @staticmethod
    def get_led_voltage_list(voltage_list_path: str) -> pd.DataFrame:
        """
        This funtion takes the path to the file containing the list of the voltages to the led driver
        and returns a pandas dataframe containing all the voltages in the order they appear in the file
        which is the same order as they were used.
        :param voltage_list_path: path to the file containing the voltage list.
        :return: a pandas dataframe with all the information.
        """
        with open(voltage_list_path, "r") as f:
            r = f.read()
        r = r.split("\n")[:-1][::2]
        voltages = [float(i) for i in r]
        voltages = pd.DataFrame(voltages, columns=["LED driver voltages"])
        return voltages
    @staticmethod
    def get_data(data_path: str) -> pd.DataFrame:
        """
        This function reads the data from the experiment
        :param data_path: path to the file containing the time series data
        :return: a pandas dataframe with the time series data of the currents
        """
        return pd.read_csv(data_path, sep="\t")
    def _load(self) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
        """
        This function loads the data using the information provided in the init
        :return: A tuple with the information, LED voltages and data DataFrames in
        that order.
        """
        breakpoint()
        info = self.get_info(self.info_path, self.gate_voltage)
        led_voltages = self.get_led_voltage_list(self.voltages_path)
        data = self.get_data(self.data_path)
        return info, led_voltages, data
    def _save(self, data) -> None:
        # TODO: finish saving method
        pass
    def _describe(self) -> Dict[str, Any]:
        """
        Returns a dict that describes the attributes of the dataset.
        :return: Returns a dict that describes the attributes of the dataset.
        """
        return dict(
            information_path=self.info_path,
            voltages_path=self.voltages_path,
            data_path=self.data_path,
            gate_voltage=self.gate_voltage  # note that this is w respect to the DP
        )Tomás Rojas
02/27/2023, 7:32 PMTomás Rojas
02/27/2023, 7:33 PMTomás Rojas
02/27/2023, 7:35 PMprueba:
  type: responsivity.extras.datasets.led_experiment_dataset.LedExperiment
  filepath: data/01_raw/UB1C/385/-0.7
prueba_partitioned:
  type: "PartitionedDataSet"
  path: "data/01_raw/UB1C/385"
  dataset: "responsivity.extras.datasets.led_experiment_dataset.LedExperiment"Nok Lam Chan
02/28/2023, 7:37 AMBut when I use the Partitioned Data Set the path gets all messed upHow exactly is it messed up? What is the resolved path when you used PartitionedDataSet?
Tomás Rojas
02/28/2023, 5:49 PMTomás Rojas
02/28/2023, 5:50 PMTomás Rojas
02/28/2023, 5:53 PMdata/
├─ 01_raw/
   ├─ device1/
      ├─ experiment1/
      │  ├─ file1.txt
      │  ├─ file2.txt
      ├─ experiment2/
         ├─ file1.txt
         ├─ file2.txtTomás Rojas
02/28/2023, 5:53 PMTomás Rojas
02/28/2023, 5:54 PMpruebaTomás Rojas
02/28/2023, 5:55 PMTomás Rojas
02/28/2023, 5:55 PMTomás Rojas
02/28/2023, 5:55 PMTomás Rojas
02/28/2023, 5:56 PMTomás Rojas
02/28/2023, 5:56 PMTomás Rojas
02/28/2023, 5:56 PMTomás Rojas
02/28/2023, 5:56 PMTomás Rojas
02/28/2023, 5:57 PMTomás Rojas
02/28/2023, 5:57 PMTomás Rojas
02/28/2023, 5:57 PMNok Lam Chan
03/01/2023, 5:30 AM01_raw/device1/experiment1/file1.txt01_raw/devixe1/experiment1Tomás Rojas
03/01/2023, 9:45 PMNok Lam Chan
03/02/2023, 4:40 AMTomás Rojas
03/02/2023, 6:21 AMNok Lam Chan
03/03/2023, 6:06 PMNok Lam Chan
03/03/2023, 6:09 PMPartitionedDataSetfindcatalog.ymlwithdirs=1maxdepth=1Nok Lam Chan
03/03/2023, 6:25 PMdef _list_partitions(self) -> List[str]:
        return [
            path
            for path in self._filesystem.find(self._normalized_path, **self._load_args)
            if path.endswith(self._filename_suffix)
        ]partitionfindTomás Rojas
03/03/2023, 7:52 PMDataSetError: No partitions found in 'data/01_raw/UB1C'Tomás Rojas
03/03/2023, 7:53 PMprueba_partitioned:
  type: "PartitionedDataSet"
  path: "data/01_raw/UB1C"
  dataset: "responsivity.extras.datasets.led_experiment_dataset.LedExperiment"
  load_args:
    with_dirs: 1
    maxdepth: 1Nok Lam Chan
03/03/2023, 8:03 PMNok Lam Chan
03/03/2023, 8:03 PMTomás Rojas
03/03/2023, 8:13 PMprueba_partitioned:
  type: "PartitionedDataSet"
  path: "data/01_raw/UB1C/405"
  dataset: "responsivity.extras.datasets.led_experiment_dataset.LedExperiment"
  load_args:
    with_dirs: True
    maxdepth: 1Tomás Rojas
03/03/2023, 8:13 PMNok Lam Chan
03/04/2023, 11:52 AMTomás Rojas
03/06/2023, 11:29 PMTomás Rojas
03/06/2023, 11:31 PMTomás Rojas
03/06/2023, 11:31 PMNok Lam Chan
03/07/2023, 10:14 AMNok Lam Chan
03/07/2023, 10:14 AMTomás Rojas
03/07/2023, 10:31 PMclass LedExperimentZip(AbstractDataSet):
    def __init__(self, filepath: str):
        self.path = filepath
        self.gate_voltage = float(os.path.split(filepath)[1][:-4])
        self.zip_file = None
        self.zip_files_dict = None
        self.info_str, self.voltages_str, self.data_str = None, None, None
    @staticmethod
    def get_info(r: str, gate_voltage: float) -> pd.DataFrame:
        """
        This method takes a path to the info file and returns a pandas
        datatrame of one row and the info in each column
        :param r: string containing the information of the file
        :param gate_voltage: this is the gate voltage with respect to the Dirac Point
        :return: a pandas dataframe with the parsed information
        """
        r = r.split("\r\n")[1:-2]
        r = [i.split(",") for i in r]
        r = [item for sublist in r for item in sublist]
        r = [i.replace(" ", "") for i in r]
        r = {i.split("=")[0]: i.split("=")[1] for i in r}
        r["Vmin"] = float(r["Vmin"][:-1])
        r["Vmax"] = float(r["Vmax"][:-1])
        r["Vstep"] = float(r["Vstep"][:-1])
        r["Cycles"] = int(r["Cycles"])
        r["waiting_time"] = float(r["waitingtime"][:-1])
        r["time_at_light"] = float(r["timeatlight"][:-1])
        r["time_at_dark"] = float(r["timeatdark"][:-1])
        r["wavelength"] = float(r["wavelength"][:-2])
        r["gate_voltage"] = gate_voltage
        info = pd.DataFrame(r, index=["value"])
        return info
    @staticmethod
    def get_data(data_string: str) -> pd.DataFrame:
        """
        This function reads the data from the experiment
        :param data_string: data as a string utf-8
        :return: a pandas dataframe with the time series data of the currents
        """
        return pd.read_csv(StringIO(data_string), sep="\t")
    @staticmethod
    def get_led_voltage_list(r: str) -> pd.DataFrame:
        """
        This funtion takes the path to the file containing the list of the voltages to the led driver
        and returns a pandas dataframe containing all the voltages in the order they appear in the file
        which is the same order as they were used.
        :param r: Voltage list information as string
        :return: a pandas dataframe with all the information.
        """
        r = r.split("\n")[:-1][::2]
        voltages = [float(i) for i in r]
        voltages = pd.DataFrame(voltages, columns=["LED driver voltages"])
        return voltages
    def _load(self) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
        """
        This function loads the data using the information provided in the init
        :return: A tuple with the information, LED voltages and data DataFrames in
        that order.
        """
        self.zip_file = ZipFile(self.path)
        self.zip_files_dict = {name: self.zip_file.read(name) for name in self.zip_file.namelist()}
        names = [name for name in self.zip_files_dict]
        names.sort()
        info_str, voltages_str, data_str = [str(self.zip_files_dict[name], 'utf-8') for name in names]
        info = self.get_info(info_str, self.gate_voltage)
        led_voltages = self.get_led_voltage_list(voltages_str)
        data = self.get_data(data_str)
        return info, led_voltages, data
    def _save(self, data) -> None:
        # TODO: finish saving method
        pass
    def _describe(self) -> Dict[str, Any]:
        """
        Returns a dict that describes the attributes of the dataset.
        :return: Returns a dict that describes the attributes of the dataset.
        """
        return dict(
            file_path=self.path,
            gate_voltage=self.gate_voltage  # note that this is w respect to the DP
        )Nok Lam Chan
03/08/2023, 11:26 AMPartitionedDataSetboundpandas.dataframe# This should be your node logic
data = []
for partitions in partitioned_datasets:
    data.append(partitions())
data = pd.concat(data)
# Now data is a large dataframe that contains all the partitionsTomás Rojas
06/02/2023, 6:21 AM