@inproceedings{50a9d52c37f34f2a9b01e6464a940737,
title = "Semi-Automated Clinical Lexicon Induction and Its Use in Cohort Selection from Clinical Notes",
abstract = "Special purpose lexicons are invaluable in biomedical natural language processing. They are especially crucial for a task such as the 13-criteria based cohort identification from clinical notes, process in N2C2 2018 Track 1 Challenge. While manually developed lexicons helped us achieve high performance, the process was ad hoc and nonreproducible. This paper presents a semi-Automated lexicon induction method, using Logistic Regression (LR) and word embeddings, which brings rigor to the process. The key idea was to use n-grams in the training corpus as features of LR and identify those features (n-grams) with the most impact on the outcome as the lexicon. The semi-Automatically generated lexicons achieved overall F measure of 0.9166 versus 0.9003 with manually generated lexicons. Therefore, this study shows that lexicons generated using a rigorous, semi-Automated approach can retain performance while bringing rigor to the process.",
keywords = "clinical text, cohort selection, hybrid methods, lexicon induction, semi-Automated",
author = "Samarth Rawal and Ashok Prakash and Soumya Adhya and Sidharth Kulkarni and Saadat Anwar and Chitta Baral and Murthy Devarakonda",
note = "Publisher Copyright: {\textcopyright} 2020 IEEE.; 8th IEEE International Conference on Healthcare Informatics, ICHI 2020 ; Conference date: 30-11-2020 Through 03-12-2020",
year = "2020",
month = nov,
doi = "10.1109/ICHI48887.2020.9374374",
language = "English (US)",
series = "2020 IEEE International Conference on Healthcare Informatics, ICHI 2020",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
booktitle = "2020 IEEE International Conference on Healthcare Informatics, ICHI 2020",
}