@Article{info:doi/10.2196/jmir.9.1.e4, author="Zeng, Qing T and Tse, Tony and Divita, Guy and Keselman, Alla and Crowell, Jon and Browne, Allen C and Goryachev, Sergey and Ngo, Long", title="Term Identification Methods for Consumer Health Vocabulary Development", journal="J Med Internet Res", year="2007", month="Mar", day="14", volume="9", number="1", pages="e4", keywords="Consumer health information; vocabulary; natural language processing", abstract="Background: The development of consumer health information applications such as health education websites has motivated the research on consumer health vocabulary (CHV). Term identification is a critical task in vocabulary development. Because of the heterogeneity and ambiguity of consumer expressions, term identification for CHV is more challenging than for professional health vocabularies. Objective: For the development of a CHV, we explored several term identification methods, including collaborative human review and automated term recognition methods. Methods: A set of criteria was established to ensure consistency in the collaborative review, which analyzed 1893 strings. Using the results from the human review, we tested two automated methods---C-value formula and a logistic regression model. Results: The study identified 753 consumer terms and found the logistic regression model to be highly effective for CHV term identification (area under the receiver operating characteristic curve = 95.5{\%}). Conclusions: The collaborative human review and logistic regression methods were effective for identifying terms for CHV development. ", issn="1438-8871", doi="10.2196/jmir.9.1.e4", url="http://www.jmir.org/2007/1/e4/", url="https://doi.org/10.2196/jmir.9.1.e4", url="http://www.ncbi.nlm.nih.gov/pubmed/17478413" }