@inproceedings{49bdb3af22b747b3aee198c1f1545560,
title = "PatentTransformer-1.5: Measuring Patent Claim Generation by Span Relevancy",
abstract = "PatentTransformer is our codename for patent text generation based on Transformer-based models. Our long-term goal of patent claim generation is to realize “augmented inventing” for inventors by leveraging new Deep Learning techniques. We envision the possibility of building an “auto-complete” function for inventors to conceive better inventions in the era of artificial intelligence. In order to generate patent claims with reasonable quality, a fundamental question is how to measure the quality. In PatentTransformer-1.5, we tackle the problem from the perspective of claim span relevancy as a proof of concept. Patent claim language was rarely explored in the NLP field. In this work, we propose a span-based approach and a generic framework to measure patent claim generation quantitatively. In order to study the effectiveness of patent claim generation, we define a metric to measure whether two consecutive spans in a generated patent claims are relevant. We treat such relevancy measurement as a span-pair classification problem, following the concept of natural language inference. Technically, the span-pair classifier is implemented by fine-tuning a pre-trained language model. The patent claim generation is implemented by fine-tuning the other pre-trained model. Specifically, we fine-tune a pre-trained Google BERT model to measure the patent claim spans generated by a fine-tuned OpenAI GPT-2 model. In this way, we re-use two of the state-of-the-art pre-trained models in the NLP field. Our result shows the effectiveness of the span-pair classifier after fine-tuning the pre-trained model. It further validates the quantitative metric of span relevancy in patent claim generation. Particularly, we found that the span relevancy ratio measured by BERT becomes lower when the diversity in GPT-2 text generation becomes higher.",
keywords = "BERT, Claims, Classification, GPT-2, NLG, NLI, NLP, Patent, Text generation",
author = "Jieh-Sheng Lee and Jieh Hsiang",
note = "Publisher Copyright: {\textcopyright} 2020, Springer Nature Switzerland AG.; 11th JSAI International Symposium on Artificial Intelligence, JSAI-isAI 2019 ; Conference date: 10-11-2019 Through 12-11-2019",
year = "2020",
month = sep,
day = "11",
doi = "10.1007/978-3-030-58790-1_2",
language = "English",
isbn = "978-3-030-58789-5",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer Science and Business Media Deutschland GmbH",
pages = "20--33",
editor = "Maki Sakamoto and Naoaki Okazaki and Koji Mineshima and Ken Satoh",
booktitle = "New Frontiers in Artificial Intelligence - JSAI-isAI International Workshops, JURISIN, AI-Biz, LENLS, Kansei-AI, 2019, Revised Selected Papers",
address = "德國",
}