From 6cb4225a9c938afc3cdb81accdc836e33ac6ca2b Mon Sep 17 00:00:00 2001 From: janezd Date: Thu, 19 Sep 2024 17:41:44 +0200 Subject: [PATCH] File: Allow selecting a file with an arbitrary extension --- Orange/widgets/data/owfile.py | 87 +++++++++++++++++------- Orange/widgets/data/tests/test_owfile.py | 4 +- 2 files changed, 66 insertions(+), 25 deletions(-) diff --git a/Orange/widgets/data/owfile.py b/Orange/widgets/data/owfile.py index e23a249b3e5..4ba48d64c11 100644 --- a/Orange/widgets/data/owfile.py +++ b/Orange/widgets/data/owfile.py @@ -147,8 +147,11 @@ class Warning(widget.OWWidget.Warning): class Error(widget.OWWidget.Error): file_not_found = Msg("File not found.") missing_reader = Msg("Missing reader.") + select_file_type = Msg("Select file type.") sheet_error = Msg("Error listing available sheets.") unknown = Msg("Read error:\n{}") + unknown_select = Msg( + "Read error, possibly due to incorrect choice of file type:\n{}") UserAdviceMessages = [ widget.Message( @@ -264,7 +267,7 @@ def package(w): self.reader_combo = QComboBox(self) self.reader_combo.setSizePolicy(Policy.Expanding, Policy.Fixed) self.reader_combo.setMinimumSize(QSize(100, 1)) - self.reader_combo.activated[int].connect(self.select_reader) + self.reader_combo.activated[int].connect(self.on_reader_change) box.layout().addWidget(self.reader_combo) layout.addWidget(box, 0, 1) @@ -327,6 +330,10 @@ def select_sheet(self): self.recent_paths[0].sheet = self.sheet_combo.currentText() self.load_data() + def on_reader_change(self, n): + self.select_reader(n) + self.load_data() + def select_reader(self, n): if self.source != self.LOCAL_FILE: return # ignore for URL's @@ -335,14 +342,11 @@ def select_reader(self, n): path = self.recent_paths[0] if n == 0: # default path.file_format = None - self.load_data() elif n <= len(self.available_readers): reader = self.available_readers[n - 1] path.file_format = reader.qualified_name() - self.load_data() else: # the rest include just qualified names path.file_format = self.reader_combo.itemText(n) - self.load_data() def _url_set(self): index = self.url_combo.currentIndex() @@ -373,7 +377,9 @@ def browse_file(self, in_demos=False): else: start_file = self.last_path() or os.path.expanduser("~/") - filename, reader, _ = open_filename_dialog(start_file, None, self.available_readers) + filename, reader, _ = open_filename_dialog( + start_file, None, self.available_readers, + add_all="*") if not filename: return self.add_path(filename) @@ -415,20 +421,20 @@ def _try_load(self): if not url: return self.Information.no_file_selected - def mark_problematic_reader(): - self.reader_combo.setItemData(self.reader_combo.currentIndex(), - QBrush(Qt.red), Qt.ForegroundRole) - try: self.reader = self._get_reader() # also sets current reader index assert self.reader is not None except MissingReaderException: - mark_problematic_reader() - return self.Error.missing_reader + if self.reader_combo.currentIndex() > 0: + return self.Error.missing_reader + else: + return self.Error.select_file_type except Exception as ex: - mark_problematic_reader() log.exception(ex) - return lambda x=ex: self.Error.unknown(str(x)) + if self.reader_combo.currentIndex() > 0: + return lambda x=ex: self.Error.unknown(str(x)) + else: + return lambda x=ex: self.Error.unknown_select(str(x)) try: self._update_sheet_combo() @@ -439,7 +445,6 @@ def mark_problematic_reader(): try: data = self.reader.read() except Exception as ex: - mark_problematic_reader() log.exception(ex) return lambda x=ex: self.Error.unknown(str(x)) if warnings: @@ -455,9 +460,25 @@ def mark_problematic_reader(): return None def _get_reader(self) -> FileFormat: + """ + Get the reader for the current file. + + For local files, this also observes the stored settings and the reader + combo, as follows: + + 1. If the file format is known (from stored settings), use it and set + the reader combo to the corresponding index (as in settings) + 2. Otherwise, detect it from the extension and set the combo to + Auto detect, overriding any previous user-set choice + 3. Otherwise, use the current combo state. + + Returns: + FileFormat: reader instance + """ if self.source == self.LOCAL_FILE: path = self.last_path() self.reader_combo.setEnabled(True) + if self.recent_paths and self.recent_paths[0].file_format: qname = self.recent_paths[0].file_format qname_index = {r.qualified_name(): i for i, r in enumerate(self.available_readers)} @@ -471,11 +492,22 @@ def _get_reader(self) -> FileFormat: try: reader_class = class_from_qualified_name(qname) except Exception as ex: - raise MissingReaderException(f'Can not find reader "{qname}"') from ex + raise MissingReaderException(f'Can not fdind reader "{qname}"') from ex reader = reader_class(path) + else: - self.reader_combo.setCurrentIndex(0) - reader = FileFormat.get_reader(path) + old_idx = self.reader_combo.currentIndex() + try: + self.reader_combo.setCurrentIndex(0) + reader = FileFormat.get_reader(path) + except MissingReaderException: + if old_idx == 0: + raise + # Set the path for the current file format, + # and repeat the call to return the corresponding reader + self.select_reader(old_idx) + return self._get_reader() + if self.recent_paths and self.recent_paths[0].sheet: reader.select_sheet(self.recent_paths[0].sheet) return reader @@ -504,12 +536,21 @@ def _select_active_sheet(self): self.sheet_combo.setCurrentIndex(0) def _initialize_reader_combo(self): - self.reader_combo.clear() - filters = [format_filter(f) for f in self.available_readers] - self.reader_combo.addItems([DEFAULT_READER_TEXT] + filters) - self.reader_combo.setCurrentIndex(0) - self.reader_combo.setDisabled(True) - # additional readers may be added in self._get_reader() + # Reset to initial state without losing the current index or + # emitting any signals. + combo = self.reader_combo + if not combo.count(): + filters = [format_filter(f) for f in self.available_readers] + combo.addItems([DEFAULT_READER_TEXT] + filters) + combo.setCurrentIndex(0) + else: + # additional readers may be added in self._get_reader() + n = len(self.available_readers) + 1 + if combo.currentIndex() >= n: + combo.setCurrentIndex(0) + while combo.count() > n: + combo.removeItem(combo.count() - 1) + combo.setDisabled(True) @staticmethod def _describe(table): diff --git a/Orange/widgets/data/tests/test_owfile.py b/Orange/widgets/data/tests/test_owfile.py index 27ef58e747b..32e56662c25 100644 --- a/Orange/widgets/data/tests/test_owfile.py +++ b/Orange/widgets/data/tests/test_owfile.py @@ -361,13 +361,13 @@ def test_reader_custom_tab(self): outdata = self.get_output(self.widget.Outputs.data) self.assertEqual(len(outdata), 150) # loaded iris - def test_no_reader_extension(self): + def test_unknown_extension(self): with named_file("", suffix=".xyz_unknown") as fn: no_reader = RecentPath(fn, None, None) self.widget = self.create_widget(OWFile, stored_settings={"recent_paths": [no_reader]}) self.widget.load_data() - self.assertTrue(self.widget.Error.missing_reader.is_shown()) + self.assertTrue(self.widget.Error.select_file_type.is_shown()) def test_fail_sheets(self): with named_file("", suffix=".failed_sheet") as fn: