@Article{info:doi/10.2196/17758,
author="Ram{\'i}rez-Cifuentes, Diana
and Freire, Ana
and Baeza-Yates, Ricardo
and Punt{\'i}, Joaquim
and Medina-Bravo, Pilar
and Velazquez, Diego Alejandro
and Gonfaus, Josep Maria
and Gonz{\`a}lez, Jordi",
title="Detection of Suicidal Ideation on Social Media: Multimodal, Relational, and Behavioral Analysis",
journal="J Med Internet Res",
year="2020",
month="Jul",
day="7",
volume="22",
number="7",
pages="e17758",
keywords="social media; mental health; suicidal ideation; risk assessment; machine learning",
abstract="Background: Suicide risk assessment usually involves an interaction between doctors and patients. However, a significant number of people with mental disorders receive no treatment for their condition due to the limited access to mental health care facilities; the reduced availability of clinicians; the lack of awareness; and stigma, neglect, and discrimination surrounding mental disorders. In contrast, internet access and social media usage have increased significantly, providing experts and patients with a means of communication that may contribute to the development of methods to detect mental health issues among social media users. Objective: This paper aimed to describe an approach for the suicide risk assessment of Spanish-speaking users on social media. We aimed to explore behavioral, relational, and multimodal data extracted from multiple social platforms and develop machine learning models to detect users at risk. Methods: We characterized users based on their writings, posting patterns, relations with other users, and images posted. Wealso evaluated statistical and deep learning approaches to handle multimodal data for the detection of users with signs of suicidalideation (suicidal ideation risk group). Our methods were evaluated over a dataset of 252 users annotated by clinicians. To evaluatethe performance of our models, we distinguished 2 control groups: users who make use of suicide-related vocabulary (focusedcontrol group) and generic random users (generic control group). Results: We identified significant statistical differences between the textual and behavioral attributes of each of the controlgroups compared with the suicidal ideation risk group. At a 95{\%} CI, when comparing the suicidal ideation risk group and thefocused control group, the number of friends (P=.04) and median tweet length (P=.04) were significantly different. The mediannumber of friends for a focused control user (median 578.5) was higher than that for a user at risk (median 372.0). Similarly, themedian tweet length was higher for focused control users, with 16 words against 13 words of suicidal ideation risk users. Ourfindings also show that the combination of textual, visual, relational, and behavioral data outperforms the accuracy of using eachmodality separately. We defined text-based baseline models based on bag of words and word embeddings, which were outperformedby our models, obtaining an increase in accuracy of up to 8{\%} when distinguishing users at risk from both types of control users. Conclusions: The types of attributes analyzed are significant for detecting users at risk, and their combination outperforms theresults provided by generic, exclusively text-based baseline models. After evaluating the contribution of image-based predictivemodels, we believe that our results can be improved by enhancing the models based on textual and relational features. Thesemethods can be extended and applied to different use cases related to other mental disorders. ",
issn="1438-8871",
doi="10.2196/17758",
url="https://www.jmir.org/2020/7/e17758",
url="https://doi.org/10.2196/17758",
url="http://www.ncbi.nlm.nih.gov/pubmed/32673256"
}