diff --git a/changelog.md b/changelog.md index 1a3267b..314ccbd 100644 --- a/changelog.md +++ b/changelog.md @@ -29,6 +29,8 @@ ### Fixed bugs +- [[#149]](https://github.com/pubs/pubs/issues/149) More robust handling of parsing and citekey errors [(#87)](https://github.com/pubs/pubs/pull/87) + - [[#148]](https://github.com/pubs/pubs/issues/148) Fix compatibility with Pyfakefs 3.7 [(#151)](https://github.com/pubs/pubs/pull/151) - [[#95]](https://github.com/pubs/pubs/issues/95) Error message when editor is missing [(#141)](https://github.com/pubs/pubs/issues/141) diff --git a/pubs/commands/add_cmd.py b/pubs/commands/add_cmd.py index a22c0a5..d193b5e 100644 --- a/pubs/commands/add_cmd.py +++ b/pubs/commands/add_cmd.py @@ -11,6 +11,7 @@ from .. import templates from .. import apis from .. import pretty from .. import utils +from .. import endecoder from ..completion import CommaSeparatedTagsCompletion @@ -57,12 +58,13 @@ def bibentry_from_editor(conf, ui, rp): bibstruct.verify_bibdata(bibentry) # REFACTOR Generate citykey again = False - except ValueError: + + except endecoder.EnDecoder.BibDecodingError: again = ui.input_yn( - question='Invalid bibfile. Edit again ?', + question='Invalid bibfile. Edit again?', default='y') if not again: - ui.exit(0) + ui.exit() return bibentry diff --git a/pubs/commands/edit_cmd.py b/pubs/commands/edit_cmd.py index b09c885..8cc175b 100644 --- a/pubs/commands/edit_cmd.py +++ b/pubs/commands/edit_cmd.py @@ -64,9 +64,14 @@ def command(conf, args): 'as `{}`.'.format(citekey, new_paper.citekey))) else: ui.info(('Paper `{}` was successfully edited.'.format( - citekey))) + citekey))) break + except coder.BibDecodingError: + if not ui.input_yn(question="Error parsing bibdata. Edit again?"): + ui.error("Aborting, paper not updated.") + ui.exit() + except repo.CiteKeyCollision: options = ['overwrite', 'edit again', 'abort'] choice = options[ui.input_choice( diff --git a/pubs/commands/import_cmd.py b/pubs/commands/import_cmd.py index 9ccb935..fa6236e 100644 --- a/pubs/commands/import_cmd.py +++ b/pubs/commands/import_cmd.py @@ -13,6 +13,10 @@ from ..uis import get_ui from ..content import system_path, read_text_file +_ABORT_USE_IGNORE_MSG = "Aborting import. Use --ignore-malformed to ignore." +_IGNORING_MSG = " Ignoring it." + + def parser(subparsers, conf): parser = subparsers.add_parser('import', help='import paper(s) to the repository') @@ -24,10 +28,12 @@ def parser(subparsers, conf): help="one or several keys to import from the file") parser.add_argument('-O', '--overwrite', action='store_true', default=False, help="Overwrite keys already in the database") + parser.add_argument('-i', '--ignore-malformed', action='store_true', default=False, + help="Ignore malformed and unreadable files and entries") return parser -def many_from_path(bibpath): +def many_from_path(ui, bibpath, ignore=False): """Extract list of papers found in bibliographic files in path. The behavior is to: @@ -49,16 +55,31 @@ def many_from_path(bibpath): biblist = [] for filepath in all_files: - biblist.append(coder.decode_bibdata(read_text_file(filepath))) + try: + biblist.append(coder.decode_bibdata(read_text_file(filepath))) + except coder.BibDecodingError: + error = "Could not parse bibtex at {}.".format(filepath) + if ignore: + ui.warning(error + _IGNORING_MSG) + else: + ui.error(error + _ABORT_USE_IGNORE_MSG) + ui.exit() papers = {} for b in biblist: for k, b in b.items(): + if k in papers: + ui.warning('Duplicated citekey {}. Keeping the last one.'.format(k)) try: papers[k] = Paper(k, b) papers[k].added = datetime.datetime.now() except ValueError as e: - papers[k] = e + error = 'Could not load entry for citekey {} ({}).'.format(k, e) + if ignore: + ui.warning(error + _IGNORING_MSG) + else: + ui.error(error + _ABORT_USE_IGNORE_MSG) + ui.exit() return papers @@ -75,20 +96,17 @@ def command(conf, args): rp = repo.Repository(conf) # Extract papers from bib - papers = many_from_path(bibpath) + papers = many_from_path(ui, bibpath, ignore=args.ignore_malformed) keys = args.keys or papers.keys() for k in keys: p = papers[k] - if isinstance(p, Exception): - ui.error('Could not load entry for citekey {}.'.format(k)) + rp.push_paper(p, overwrite=args.overwrite) + ui.info('{} imported.'.format(color.dye_out(p.citekey, 'citekey'))) + docfile = bibstruct.extract_docfile(p.bibdata) + if docfile is None: + ui.warning("No file for {}.".format(p.citekey)) else: - rp.push_paper(p, overwrite=args.overwrite) - ui.info('{} imported.'.format(color.dye_out(p.citekey, 'citekey'))) - docfile = bibstruct.extract_docfile(p.bibdata) - if docfile is None: - ui.warning("No file for {}.".format(p.citekey)) - else: - rp.push_doc(p.citekey, docfile, copy=copy) - #FIXME should move the file if configured to do so. + rp.push_doc(p.citekey, docfile, copy=copy) + # FIXME should move the file if configured to do so. rp.close() diff --git a/pubs/databroker.py b/pubs/databroker.py index d529c3e..97c1398 100644 --- a/pubs/databroker.py +++ b/pubs/databroker.py @@ -45,7 +45,11 @@ class DataBroker(object): def pull_bibentry(self, citekey): bibdata_raw = self.filebroker.pull_bibfile(citekey) - return self.endecoder.decode_bibdata(bibdata_raw) + try: + return self.endecoder.decode_bibdata(bibdata_raw) + except self.endecoder.BibDecodingError as e: + e.message = "Unable to decode bibtex for paper {}.".format(citekey) + raise e def push_metadata(self, citekey, metadata): metadata_raw = self.endecoder.encode_metadata(metadata) diff --git a/pubs/endecoder.py b/pubs/endecoder.py index 8fd6941..259b8d9 100644 --- a/pubs/endecoder.py +++ b/pubs/endecoder.py @@ -66,6 +66,16 @@ class EnDecoder(object): * encode_bibdata will try to recognize exceptions """ + class BibDecodingError(Exception): + + message = "Could not parse provided bibdata:\n---\n{}\n---" + + def __init__(self, bibdata): + self.data = bibdata + + def __str__(self): + return self.message.format(self.data) + bwriter = bp.bwriter.BibTexWriter() bwriter.display_order = BIBFIELD_ORDER @@ -103,7 +113,10 @@ class EnDecoder(object): return entry def decode_bibdata(self, bibdata): - """""" + """Decodes bibdata from string. + + If the decoding fails, returns a BibParseError. + """ try: entries = bp.bparser.BibTexParser( bibdata, common_strings=True, @@ -121,4 +134,5 @@ class EnDecoder(object): except Exception: import traceback traceback.print_exc() - raise ValueError('could not parse provided bibdata:\n{}'.format(bibdata)) + raise self.BibDecodingError(bibdata) + # TODO: filter exceptions from pyparsing and pass reason upstream diff --git a/pubs/uis.py b/pubs/uis.py index afd90fe..9a39b32 100644 --- a/pubs/uis.py +++ b/pubs/uis.py @@ -119,7 +119,7 @@ class InputUI(PrintUI): except EOFError: self.error('Standard input ended while waiting for answer.') self.exit(1) - return ustr(data) #.decode('utf-8') + return ustr(data) #.decode('utf-8') def input_choice_ng(self, options, option_chars=None, default=None, question=''): """Ask the user to chose between a set of options. The user is asked diff --git a/tests/fake_env.py b/tests/fake_env.py index 1a1fe6c..d099a79 100644 --- a/tests/fake_env.py +++ b/tests/fake_env.py @@ -77,6 +77,16 @@ class FakeInput(): if md.__name__ == 'pubs.uis': md.InputUI.editor_input = self md.InputUI.edit_file = self.input_to_file + # Do not catch UnexpectedInput + original_handler = md.InputUI.handle_exception + + def handler(ui, exc): + if isinstance(exc, self.UnexpectedInput): + raise + else: + original_handler(ui, exc) + + md.InputUI.handle_exception = handler def input_to_file(self, path_to_file, temporary=True): content.write_file(path_to_file, self()) diff --git a/tests/str_fixtures.py b/tests/str_fixtures.py index 03360b8..3b6ded3 100644 --- a/tests/str_fixtures.py +++ b/tests/str_fixtures.py @@ -69,7 +69,7 @@ bibtex_no_citekey = """@Manual{, } """ -bibtex_month= """@inproceedings{Goyal2017, +bibtex_month = """@inproceedings{Goyal2017, author = {Goyal, Anirudh and Sordoni, Alessandro and C{\^{o}}t{\'{e}}, Marc-Alexandre and Ke, Nan Rosemary and Bengio, Yoshua}, title = {Z-Forcing: Training Stochastic Recurrent Networks}, year = {2017}, @@ -78,6 +78,12 @@ bibtex_month= """@inproceedings{Goyal2017, } """ +not_bibtex = """@misc{this looks, + like = a = bibtex file but + , is not a real one! + +""" + sample_conf = """ [main] diff --git a/tests/test_apis.py b/tests/test_apis.py index 087f32f..a83481a 100644 --- a/tests/test_apis.py +++ b/tests/test_apis.py @@ -32,7 +32,7 @@ class TestDOI2Bibtex(unittest.TestCase): def test_parse_fails_on_incorrect_DOI(self): bib = doi2bibtex('999999') - with self.assertRaises(ValueError): + with self.assertRaises(EnDecoder.BibDecodingError): self.endecoder.decode_bibdata(bib) @@ -56,7 +56,7 @@ class TestISBN2Bibtex(unittest.TestCase): def test_parse_fails_on_incorrect_ISBN(self): bib = doi2bibtex('9' * 13) - with self.assertRaises(ValueError): + with self.assertRaises(EnDecoder.BibDecodingError): self.endecoder.decode_bibdata(bib) diff --git a/tests/test_endecoder.py b/tests/test_endecoder.py index 80a9f53..a6e3043 100644 --- a/tests/test_endecoder.py +++ b/tests/test_endecoder.py @@ -52,7 +52,7 @@ class TestEnDecode(unittest.TestCase): self.assertEqual(bibraw1, bibraw2) - def test_endecode_bibtex(self): + def test_endecode_bibtex_converts_month_string(self): """Test if `month=dec` is correctly recognized and transformed into `month={December}`""" decoder = endecoder.EnDecoder() @@ -147,6 +147,11 @@ class TestEnDecode(unittest.TestCase): self.assertIn('author', entry1) self.assertIn('institution', entry1) + def test_endecodes_raises_exception(self): + decoder = endecoder.EnDecoder() + with self.assertRaises(decoder.BibDecodingError): + decoder.decode_bibdata("@misc{I am not a correct bibtex{{}") + if __name__ == '__main__': unittest.main() diff --git a/tests/test_usecase.py b/tests/test_usecase.py index ae98258..43cbf2c 100644 --- a/tests/test_usecase.py +++ b/tests/test_usecase.py @@ -128,7 +128,7 @@ class CommandTestCase(fake_env.TestFakeFs): outs.append(color.undye(actual_out)) else: pubs_cmd.execute(actual_cmd.split()) - except fake_env.FakeInput.UnexpectedInput as e: + except fake_env.FakeInput.UnexpectedInput: self.fail('Unexpected input asked by command: {}.'.format( actual_cmd)) return outs @@ -197,7 +197,7 @@ class TestAlone(CommandTestCase): def test_alone_misses_command(self): with self.assertRaises(FakeSystemExit) as cm: self.execute_cmds(['pubs']) - self.assertEqual(cm.exception.code, 2) + self.assertEqual(cm.exception.code, 2) def test_alone_prints_help(self): # capturing the output of `pubs --help` is difficult because argparse @@ -334,11 +334,20 @@ class TestAdd(URLContentTestCase): def test_add_no_citekey_fails(self): # See #113 cmds = ['pubs init', - ('pubs add', [str_fixtures.bibtex_no_citekey]), + ('pubs add', [str_fixtures.bibtex_no_citekey, 'n']), ] with self.assertRaises(FakeSystemExit): self.execute_cmds(cmds) + def test_add_edit_fails(self): + cmds = ['pubs init', + ('pubs add', + ['@misc{I am not a correct bibtex{{}', 'n']), + ] + with self.assertRaises(FakeSystemExit) as cm: + self.execute_cmds(cmds) + self.assertEqual(cm.exception.code, 1) + class TestList(DataCommandTestCase): @@ -679,16 +688,40 @@ class TestUsecase(DataCommandTestCase): self.assertFileContentEqual(os.path.expanduser('~/.pubsrc'), str_fixtures.sample_conf) - def test_editor_abort(self): + def test_editor_aborts(self): with self.assertRaises(FakeSystemExit): cmds = ['pubs init', - ('pubs add', ['abc', 'n']), - ('pubs add', ['abc', 'y', 'abc', 'n']), 'pubs add data/pagerank.bib', - ('pubs edit Page99', ['', 'a']), + ('pubs edit Page99', ['', 'n']), ] self.execute_cmds(cmds) + def test_editor_succeeds_on_second_edit(self): + cmds = ['pubs init', + 'pubs add data/pagerank.bib', + ('pubs edit Page99', [ + '@misc{Page99, title="TTT" author="X. YY"}', 'y', + '@misc{Page99, title="TTT", author="X. YY"}', '']), + ('pubs list', [], '[Page99] YY, X. "TTT" \n') + ] + self.execute_cmds(cmds) + + def test_add_aborts(self): + with self.assertRaises(FakeSystemExit): + cmds = ['pubs init', + ('pubs add New', ['']), + ] + self.execute_cmds(cmds) + + def test_add_succeeds_on_second_edit(self): + cmds = ['pubs init', + ('pubs add', [ + '', 'y', + '@misc{New, title="TTT", author="X. YY"}', '']), + ('pubs list', [], '[New] YY, X. "TTT" \n') + ] + self.execute_cmds(cmds) + def test_editor_success(self): cmds = ['pubs init', ('pubs add', [str_fixtures.bibtex_external0]), @@ -790,6 +823,25 @@ class TestUsecase(DataCommandTestCase): outs = self.execute_cmds(cmds) self.assertEqual(1 + 1, len(outs[-1].split('\n'))) + def test_import_fails_without_ignore(self): + with FakeFileOpen(self.fs)('data/fake.bib', 'w') as f: + f.write(str_fixtures.not_bibtex) + cmds = ['pubs init', + 'pubs import data/ Page99', + ] + with self.assertRaises(FakeSystemExit): + self.execute_cmds(cmds) + + def test_import_ignores(self): + with FakeFileOpen(self.fs)('data/fake.bib', 'w') as f: + f.write(str_fixtures.not_bibtex) + cmds = ['pubs init', + 'pubs import --ignore-malformed data/ Page99', + 'pubs list' + ] + outs = self.execute_cmds(cmds) + self.assertEqual(1 + 1, len(outs[-1].split('\n'))) + def test_update(self): cmds = ['pubs init', 'pubs add data/pagerank.bib',