Publications

2024
Lunny, Carole; Whitelaw, Sera; Reid, Emma K; Chi, Yuan; Ferri, Nicola; Zhang, Jia He (Janet); Pieper, Dawid; Kanji, Salmaan; Veroniki, Areti-Angeliki; Shea, Beverley; Dourka, Jasmeen; Ardern, Clare; Pham, Ba; Bagheri, Ebrahim; Tricco, Andrea C
Exploring decision-makers’ challenges and strategies when selecting multiple systematic reviews: insights for AI decision support tools in healthcare Journal Article
In: BMJ Open, vol. 14, pp. e084124, 2024.
Abstract | Links | BibTeX | Tags:
@article{BMJOpen1,
title = {Exploring decision-makers’ challenges and strategies when selecting multiple systematic reviews: insights for AI decision support tools in healthcare},
author = {Carole Lunny and Sera Whitelaw and Emma K Reid and Yuan Chi and Nicola Ferri and Jia He (Janet) Zhang and Dawid Pieper and Salmaan Kanji and Areti-Angeliki Veroniki and Beverley Shea and Jasmeen Dourka and Clare Ardern and Ba Pham and Ebrahim Bagheri and Andrea C Tricco},
doi = {10.1136/bmjopen-2024-084124},
year = {2024},
date = {2024-07-07},
urldate = {2024-07-07},
journal = {BMJ Open},
volume = {14},
pages = {e084124},
abstract = {Background Systematic reviews (SRs) are being published at an accelerated rate. Decision-makers may struggle with comparing and choosing between multiple SRs on the same topic. We aimed to understand how healthcare decision-makers (eg, practitioners, policymakers, researchers) use SRs to inform decision-making and to explore the potential role of a proposed artificial intelligence (AI) tool to assist in critical appraisal and choosing among SRs.

Methods We developed a survey with 21 open and closed questions. We followed a knowledge translation plan to disseminate the survey through social media and professional networks.

Results Our survey response rate was lower than expected (7.9% of distributed emails). Of the 684 respondents, 58.2% identified as researchers, 37.1% as practitioners, 19.2% as students and 13.5% as policymakers. Respondents frequently sought out SRs (97.1%) as a source of evidence to inform decision-making. They frequently (97.9%) found more than one SR on a given topic of interest to them. Just over half (50.8%) struggled to choose the most trustworthy SR among multiple. These difficulties related to lack of time (55.2%), or difficulties comparing due to varying methodological quality of SRs (54.2%), differences in results and conclusions (49.7%) or variation in the included studies (44.6%). Respondents compared SRs based on the relevance to their question of interest, methodological quality, and recency of the SR search. Most respondents (87.0%) were interested in an AI tool to help appraise and compare SRs.

Conclusions Given the identified barriers of using SR evidence, an AI tool to facilitate comparison of the relevance of SRs, the search and methodological quality, could help users efficiently choose among SRs and make healthcare decisions.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}

Close
Background Systematic reviews (SRs) are being published at an accelerated rate. Decision-makers may struggle with comparing and choosing between multiple SRs on the same topic. We aimed to understand how healthcare decision-makers (eg, practitioners, policymakers, researchers) use SRs to inform decision-making and to explore the potential role of a proposed artificial intelligence (AI) tool to assist in critical appraisal and choosing among SRs.

Methods We developed a survey with 21 open and closed questions. We followed a knowledge translation plan to disseminate the survey through social media and professional networks.

Results Our survey response rate was lower than expected (7.9% of distributed emails). Of the 684 respondents, 58.2% identified as researchers, 37.1% as practitioners, 19.2% as students and 13.5% as policymakers. Respondents frequently sought out SRs (97.1%) as a source of evidence to inform decision-making. They frequently (97.9%) found more than one SR on a given topic of interest to them. Just over half (50.8%) struggled to choose the most trustworthy SR among multiple. These difficulties related to lack of time (55.2%), or difficulties comparing due to varying methodological quality of SRs (54.2%), differences in results and conclusions (49.7%) or variation in the included studies (44.6%). Respondents compared SRs based on the relevance to their question of interest, methodological quality, and recency of the SR search. Most respondents (87.0%) were interested in an AI tool to help appraise and compare SRs.

Conclusions Given the identified barriers of using SR evidence, an AI tool to facilitate comparison of the relevance of SRs, the search and methodological quality, could help users efficiently choose among SRs and make healthcare decisions.
Close
doi:10.1136/bmjopen-2024-084124
Close
Zarrinkalam, Fattane; Noughabi, Havva Alizadeh; Noorian, Zeinab; Fani, Hossein; Bagheri, Ebrahim
Predicting Users’ Future Interests on Social Networks: A Reference Framework Journal Article
In: Information Processing and Management , 2024.
Abstract | Links | BibTeX | Tags:
@article{IPM2024ab,
title = {Predicting Users’ Future Interests on Social Networks: A Reference Framework},
author = {Fattane Zarrinkalam and Havva Alizadeh Noughabi and Zeinab Noorian and Hossein Fani and Ebrahim Bagheri},
url = {https://ls3.rnet.torontomu.ca/predicting_users__future_interests_on_social_networks__a_reference_framework-3-4/},
year = {2024},
date = {2024-05-01},
urldate = {2024-05-01},
journal = {Information Processing and Management },
abstract = {Predicting users’ interests on social networks is gaining attention due to its potential to cater customized information and services to the end users. Although previous works have extensively explored how users’ interests can be modeled on social networks, there has been limited investigation into the prediction of users’ future interests. The objective of our work in this paper is to empirically study the effectiveness of different sets of features based on users’ past social interactions, historical interests and their temporal dynamics to predict their interests over a collection of future-yet-unobserved topics. More specifically, we introduce and formalize the features for interest prediction in four categories: user-based, topical, explicit user-topic engagement, and friends’ influence. We further explore the influence of temporality by augmenting features with information pertaining to users’ historical interests and social connections. We model the task of future interest prediction as a learning-to-rank problem where different features and their related categories are ranked based on their relevance and performance in interest prediction, and investigate the efficiency of different features individually and comparatively for predicting the future interest of users with different activity levels in social networks over on unobserved topics. After conducting experiments on a real-world dataset sourced from Twitter, we have identified several noteworthy findings: 1) relevance feature in the category of past explicit user-topic engagement is the strongest indicator for predicting user’s future interest across all user groups, with an observed 8.57% decrease in NDCG and an 8.95% decrease in MAP when it is removed in the ablation study. 2) the observation of an 8.06% decrease in NDCG and a 7.3% decrease in MAP, when topical features such as popularity, freshness, and coherence are removed in the ablation study, highlights their significance as among the strongest indicators for users’ future interest, particularly for low-activity users. 3) although temporal features show a clear positive impact across user groups with varying levels of activity (resulting in a 4.5% decrease in NDCG and a 7.3% decrease in MAP when removed in the ablation study), the temporal topical features do not demonstrate a significant positive effect, and 4) The removal of user-specific characteristics such as influence and personality traits in the ablation study reveals their significant impact in predicting future interest over cold topics, reflected by a 5.49% decrease in NDCG and a 5.72% decrease in MAP. Our findings make significant contributions to the field of future interest prediction, offering valuable insights and practical implications for various applications in social network analysis.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}

Close
Predicting users’ interests on social networks is gaining attention due to its potential to cater customized information and services to the end users. Although previous works have extensively explored how users’ interests can be modeled on social networks, there has been limited investigation into the prediction of users’ future interests. The objective of our work in this paper is to empirically study the effectiveness of different sets of features based on users’ past social interactions, historical interests and their temporal dynamics to predict their interests over a collection of future-yet-unobserved topics. More specifically, we introduce and formalize the features for interest prediction in four categories: user-based, topical, explicit user-topic engagement, and friends’ influence. We further explore the influence of temporality by augmenting features with information pertaining to users’ historical interests and social connections. We model the task of future interest prediction as a learning-to-rank problem where different features and their related categories are ranked based on their relevance and performance in interest prediction, and investigate the efficiency of different features individually and comparatively for predicting the future interest of users with different activity levels in social networks over on unobserved topics. After conducting experiments on a real-world dataset sourced from Twitter, we have identified several noteworthy findings: 1) relevance feature in the category of past explicit user-topic engagement is the strongest indicator for predicting user’s future interest across all user groups, with an observed 8.57% decrease in NDCG and an 8.95% decrease in MAP when it is removed in the ablation study. 2) the observation of an 8.06% decrease in NDCG and a 7.3% decrease in MAP, when topical features such as popularity, freshness, and coherence are removed in the ablation study, highlights their significance as among the strongest indicators for users’ future interest, particularly for low-activity users. 3) although temporal features show a clear positive impact across user groups with varying levels of activity (resulting in a 4.5% decrease in NDCG and a 7.3% decrease in MAP when removed in the ablation study), the temporal topical features do not demonstrate a significant positive effect, and 4) The removal of user-specific characteristics such as influence and personality traits in the ablation study reveals their significant impact in predicting future interest over cold topics, reflected by a 5.49% decrease in NDCG and a 5.72% decrease in MAP. Our findings make significant contributions to the field of future interest prediction, offering valuable insights and practical implications for various applications in social network analysis.
Close
https://ls3.rnet.torontomu.ca/predicting_users__future_interests_on_social_netwo[...]
Close
Saleminezhad, Abbas; Arabzadeh, Negar; Bagheri, Ebrahim; Beheshti, Soosan
Context-Aware Query Term Difficulty Estimation for Performance Prediction Proceedings Article
In: The 46th European Conference on Information Retrieval (ECIR 2024), 2024.
Abstract | Links | BibTeX | Tags:
@inproceedings{ecir2024b,
title = {Context-Aware Query Term Difficulty Estimation for Performance Prediction},
author = {Abbas Saleminezhad and Negar Arabzadeh and Ebrahim Bagheri and Soosan Beheshti},
url = {https://ls3.rnet.torontomu.ca/ecir_2024__qpp_saleminezhad/},
year = {2024},
date = {2024-01-02},
urldate = {2024-01-02},
booktitle = {The 46th European Conference on Information Retrieval (ECIR 2024)},
abstract = {Research has already found that many retrieval methods are sensitive to the choice and order of terms that appear in a query, which can significantly impact retrieval effectiveness. We capitalize on this finding in order to predict the performance of a query. More specifically, we propose to learn query term difficulty weights specifically within the context of each query, which could then be used as indicators of whether each query term has the likelihood of making the query more effective or not. We show how such difficulty weights can be learnt through the finetuning of a language model. In addition, we propose an approach to integrate the learnt weights into a cross-encoder architecture to predict query performance. We show that our proposed approach shows a consistently strong performance prediction on the MSMARCO collection and its associated widely used Trec Deep Learning tracks query sets.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
Research has already found that many retrieval methods are sensitive to the choice and order of terms that appear in a query, which can significantly impact retrieval effectiveness. We capitalize on this finding in order to predict the performance of a query. More specifically, we propose to learn query term difficulty weights specifically within the context of each query, which could then be used as indicators of whether each query term has the likelihood of making the query more effective or not. We show how such difficulty weights can be learnt through the finetuning of a language model. In addition, we propose an approach to integrate the learnt weights into a cross-encoder architecture to predict query performance. We show that our proposed approach shows a consistently strong performance prediction on the MSMARCO collection and its associated widely used Trec Deep Learning tracks query sets.
Close
https://ls3.rnet.torontomu.ca/ecir_2024__qpp_saleminezhad/
Close
Hosseini, Mohammad; Arabzadeh, Negar; Zihayat, Morteza; Bagheri, Ebrahim
Enhanced Retrieval Effectiveness through Selective Query Generation Proceedings Article
In: 33rd ACM International Conference on Information and Knowledge Management (CIKM 2024), 2024.
Abstract | BibTeX | Tags:
@inproceedings{CIKM2024-2,
title = {Enhanced Retrieval Effectiveness through Selective Query Generation},
author = {Mohammad Hosseini and Negar Arabzadeh and Morteza Zihayat and Ebrahim Bagheri},
year = {2024},
date = {2024-07-17},
booktitle = {33rd ACM International Conference on Information and Knowledge Management (CIKM 2024)},
abstract = {Prior research has demonstrated that reformulation of queries can significantly enhance retrieval effectiveness. With advancements in neural architectures, the task of query reformulation has evolved into a supervised translation problem, aimed at rewriting queries into more effective alternatives. Despite notable successes, identifying optimal reformulations that cover the same information need while enhancing retrieval effectiveness is still challenging. This paper introduces a two-step query reformulation framework for generating and selecting optimal target query variants which not only achieve higher retrieval performance but also preserve the original query's information need. Our comprehensive evaluations on the MS MARCO dataset and TREC Deep Learning tracks demonstrate substantial improvements over original query's performance.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
Prior research has demonstrated that reformulation of queries can significantly enhance retrieval effectiveness. With advancements in neural architectures, the task of query reformulation has evolved into a supervised translation problem, aimed at rewriting queries into more effective alternatives. Despite notable successes, identifying optimal reformulations that cover the same information need while enhancing retrieval effectiveness is still challenging. This paper introduces a two-step query reformulation framework for generating and selecting optimal target query variants which not only achieve higher retrieval performance but also preserve the original query's information need. Our comprehensive evaluations on the MS MARCO dataset and TREC Deep Learning tracks demonstrate substantial improvements over original query's performance.
Close
Ebrahimi, Sajad; Khodabakhsh, Maryam; Arabzadeh, Negar; Bagheri, Ebrahim
Estimating Query Performance Through Rich Contextualized Query Representations Proceedings Article
In: The 46th European Conference on Information Retrieval (ECIR 2024), 2024.
Abstract | Links | BibTeX | Tags:
@inproceedings{ecir2024c,
title = {Estimating Query Performance Through Rich Contextualized Query Representations},
author = {Sajad Ebrahimi and Maryam Khodabakhsh and Negar Arabzadeh and Ebrahim Bagheri},
url = {https://ls3.rnet.torontomu.ca/ecir2024__qpp_ebrahimi/},
year = {2024},
date = {2024-01-01},
urldate = {2024-01-01},
booktitle = {The 46th European Conference on Information Retrieval (ECIR 2024)},
abstract = {The state-of-the-art query performance prediction methods rely on the fine-tuning of contextual language models to estimate retrieval effectiveness on a per-query basis. Our work in this paper builds on this strong foundation and proposes to learn rich query representations by learning the interactions between the query and two important contextual information, namely (1) the set of documents retrieved by that query, and (2) the set of similar historical queries with known retrieval effectiveness. We propose that such contextualized query representations can be more accurate estimators of query performance as they embed the performance of past similar queries and the semantics of the documents retrieved by the query. We perform extensive experiments on the MSMARCO collection and its accompanying query sets including MSMARCO Dev set and TREC Deep Learning tracks of 2019, 2020, 2021, and DL-Hard. Our experiments reveal that our proposed method shows robust and effective performance compared to state-of-the-art baselines.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
The state-of-the-art query performance prediction methods rely on the fine-tuning of contextual language models to estimate retrieval effectiveness on a per-query basis. Our work in this paper builds on this strong foundation and proposes to learn rich query representations by learning the interactions between the query and two important contextual information, namely (1) the set of documents retrieved by that query, and (2) the set of similar historical queries with known retrieval effectiveness. We propose that such contextualized query representations can be more accurate estimators of query performance as they embed the performance of past similar queries and the semantics of the documents retrieved by the query. We perform extensive experiments on the MSMARCO collection and its accompanying query sets including MSMARCO Dev set and TREC Deep Learning tracks of 2019, 2020, 2021, and DL-Hard. Our experiments reveal that our proposed method shows robust and effective performance compared to state-of-the-art baselines.
Close
https://ls3.rnet.torontomu.ca/ecir2024__qpp_ebrahimi/
Close
Bigdeli, Amin; Arabzadeh, Negar; Bagheri, Ebrahim
LaQuE: Enabling Entity Search at Scale Proceedings Article
In: The 46th European Conference on Information Retrieval (ECIR 2024), 2024.
Abstract | Links | BibTeX | Tags:
@inproceedings{ecir2024d,
title = {LaQuE: Enabling Entity Search at Scale},
author = {Amin Bigdeli and Negar Arabzadeh and Ebrahim Bagheri},
url = {https://ls3.rnet.torontomu.ca/ecir_2024__laque_arabzadeh/},
year = {2024},
date = {2024-01-01},
urldate = {2024-01-01},
booktitle = {The 46th European Conference on Information Retrieval (ECIR 2024)},
abstract = {Entity search plays a crucial role in various information access domains, where users seek information about specific entities. Despite significant research efforts to improve entity search methods, the availability of large-scale resources and extensible frameworks has been limiting progress. In this work, we present LaQuE (Large-scale Queries for Entity search), a curated framework for entity search, which includes a reproducible and extensible code base as well as a large relevance judgment collection consisting of real-user queries based on the ORCAS collection. LaQuE is industry-scale and suitable for training complex neural models for entity search. We develop methods for curating and judging entity collections, as well as training entity search methods based on LaQuE. We additionally establish strong baselines within LaQuE based on various retrievers, including traditional bag-of-words-based methods and neural-based models. We show that training neural entity search models on LaQuE enhances retrieval effectiveness compared to the state-of-the-art. Additionally, we categorize the released queries in LaQuE based on their popularity and difficulty, encouraging research on more challenging queries for the entity search task. We publicly release LaQuE at url{https://anonymous.4open.science/r/LaQuE-0CDD/}.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
Entity search plays a crucial role in various information access domains, where users seek information about specific entities. Despite significant research efforts to improve entity search methods, the availability of large-scale resources and extensible frameworks has been limiting progress. In this work, we present LaQuE (Large-scale Queries for Entity search), a curated framework for entity search, which includes a reproducible and extensible code base as well as a large relevance judgment collection consisting of real-user queries based on the ORCAS collection. LaQuE is industry-scale and suitable for training complex neural models for entity search. We develop methods for curating and judging entity collections, as well as training entity search methods based on LaQuE. We additionally establish strong baselines within LaQuE based on various retrievers, including traditional bag-of-words-based methods and neural-based models. We show that training neural entity search models on LaQuE enhances retrieval effectiveness compared to the state-of-the-art. Additionally, we categorize the released queries in LaQuE based on their popularity and difficulty, encouraging research on more challenging queries for the entity search task. We publicly release LaQuE at url{https://anonymous.4open.science/r/LaQuE-0CDD/}.
Close
https://ls3.rnet.torontomu.ca/ecir_2024__laque_arabzadeh/
Close
Bigdeli, Amin; Arabzadeh, Negar; Bagheri, Ebrahim
Learning to Jointly Transform and Rank Difficult Queries Proceedings Article
In: The 46th European Conference on Information Retrieval (ECIR 2024), 2024.
Abstract | Links | BibTeX | Tags:
@inproceedings{ecir2024a,
title = {Learning to Jointly Transform and Rank Difficult Queries},
author = {Amin Bigdeli and Negar Arabzadeh and Ebrahim Bagheri},
url = {https://ls3.rnet.torontomu.ca/ecir_2024___query_transform_bigdeli/},
year = {2024},
date = {2024-01-01},
urldate = {2024-01-01},
booktitle = {The 46th European Conference on Information Retrieval (ECIR 2024)},
abstract = {Recent empirical studies have shown that while neural rankers exhibit increasingly higher retrieval effectiveness on tasks such as ad hoc retrieval, these improved performances are not experienced uniformly across the range of all queries. There are typically a large subset of queries that are not satisfied by neural rankers. These queries are often referred to as difficult queries. Since neural rankers operate based on the similarity between the embedding representations of queries and their relevant documents, the poor performance of difficult queries can be due to their sub-optimal learnt representations. Our work in this paper aims to learn to rank documents and also transform query representations in tandem such that the representation of queries are transformed into one that shows higher resemblance to their relevant document. This way, our method will provide the opportunity to satisfy a large number of difficult queries that would otherwise not be addressed. To do so, we propose to integrate two forms of triplet loss functions into neural rankers such that they ensure that each query is moved along the embedding space, through the transformation of its embedding representation, in order to be placed close to its relevant document(s). We perform experiments based on the MS MARCO passage ranking task and show that our proposed method has been able to show noticeable performance improvement for queries that were extremely difficult for existing neural rankers. On average, our approach has been able to satisfy 277 queries with an MRR@10 of 0.21 for queries that had a reciprocal rank of zero on the initial neural ranker.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
Recent empirical studies have shown that while neural rankers exhibit increasingly higher retrieval effectiveness on tasks such as ad hoc retrieval, these improved performances are not experienced uniformly across the range of all queries. There are typically a large subset of queries that are not satisfied by neural rankers. These queries are often referred to as difficult queries. Since neural rankers operate based on the similarity between the embedding representations of queries and their relevant documents, the poor performance of difficult queries can be due to their sub-optimal learnt representations. Our work in this paper aims to learn to rank documents and also transform query representations in tandem such that the representation of queries are transformed into one that shows higher resemblance to their relevant document. This way, our method will provide the opportunity to satisfy a large number of difficult queries that would otherwise not be addressed. To do so, we propose to integrate two forms of triplet loss functions into neural rankers such that they ensure that each query is moved along the embedding space, through the transformation of its embedding representation, in order to be placed close to its relevant document(s). We perform experiments based on the MS MARCO passage ranking task and show that our proposed method has been able to show noticeable performance improvement for queries that were extremely difficult for existing neural rankers. On average, our approach has been able to satisfy 277 queries with an MRR@10 of 0.21 for queries that had a reciprocal rank of zero on the initial neural ranker.
Close
https://ls3.rnet.torontomu.ca/ecir_2024___query_transform_bigdeli/
Close
Chuan Meng Negar Arabzadeh, Mohammad Aliannejadi; Bagheri, Ebrahim
Query Performance Prediction: From Fundamentals to Advanced Techniques Proceedings Article
In: The 46th European Conference on Information Retrieval (ECIR 2024) (tutorial), 2024.
Abstract | Links | BibTeX | Tags:
@inproceedings{ecir2024e,
title = {Query Performance Prediction: From Fundamentals to Advanced Techniques},
author = {Negar Arabzadeh, Chuan Meng, Mohammad Aliannejadi and Ebrahim Bagheri},
url = {https://ls3.rnet.torontomu.ca/ecir2024__tutorial_qpp__arabzadeh/},
year = {2024},
date = {2024-01-01},
urldate = {2024-01-01},
booktitle = {The 46th European Conference on Information Retrieval (ECIR 2024) (tutorial)},
abstract = {Query performance prediction (QPP) is a core task in information retrieval (IR) that aims at predicting the retrieval quality for a given query without relevance judgments. QPP has been investigated for decades and has witnessed a surge in research activity in recent years; QPP has been shown to benefit various aspects, e.g., improving retrieval effectiveness by selecting the most effective ranking function per query. Despite its importance, there is no recent tutorial to provide a comprehensive overview of QPP techniques in the era of pre-trained/large language models or in the scenario of emerging conversational search (CS); moreover, while research in QPP has yielded promising results, its practical implementation and integration into real-world search engines remain a challenge. In this tutorial, we have three main objectives. First, we aim to disseminate the latest advancements in QPP to the IR community. Second, we go beyond investigating QPP in ad-hoc search and cover QPP for CS. Third, the tutorial offers a unique opportunity to bridge the gap between theory and practice; we aim to equip participants with the essential skills and insights needed to navigate the evolving landscape of QPP, ultimately benefiting both researchers and practitioners in the field of IR and encouraging them to work around the future avenues on QPP.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
Query performance prediction (QPP) is a core task in information retrieval (IR) that aims at predicting the retrieval quality for a given query without relevance judgments. QPP has been investigated for decades and has witnessed a surge in research activity in recent years; QPP has been shown to benefit various aspects, e.g., improving retrieval effectiveness by selecting the most effective ranking function per query. Despite its importance, there is no recent tutorial to provide a comprehensive overview of QPP techniques in the era of pre-trained/large language models or in the scenario of emerging conversational search (CS); moreover, while research in QPP has yielded promising results, its practical implementation and integration into real-world search engines remain a challenge. In this tutorial, we have three main objectives. First, we aim to disseminate the latest advancements in QPP to the IR community. Second, we go beyond investigating QPP in ad-hoc search and cover QPP for CS. Third, the tutorial offers a unique opportunity to bridge the gap between theory and practice; we aim to equip participants with the essential skills and insights needed to navigate the evolving landscape of QPP, ultimately benefiting both researchers and practitioners in the field of IR and encouraging them to work around the future avenues on QPP.
Close
https://ls3.rnet.torontomu.ca/ecir2024__tutorial_qpp__arabzadeh/
Close
Arabzadeh, Negar; Ebrahimi, Sajad; Salamat, Sara; Bashari, Mahdi; Bagheri, Ebrahim
Reviewerly: Modeling the Reviewer Assignment Task as an Information Retrieval Problem Proceedings Article
In: 33rd ACM International Conference on Information and Knowledge Management (CIKM 2024), 2024.
Abstract | BibTeX | Tags:
@inproceedings{CIKM2024-1,
title = {Reviewerly: Modeling the Reviewer Assignment Task as an Information Retrieval Problem},
author = {Negar Arabzadeh and Sajad Ebrahimi and Sara Salamat and Mahdi Bashari and Ebrahim Bagheri},
year = {2024},
date = {2024-07-17},
booktitle = {33rd ACM International Conference on Information and Knowledge Management (CIKM 2024)},
abstract = {The peer review process is a fundamental aspect of academic publishing, ensuring the quality and credibility of scholarly work. In this talk, we will explore the critical challenges associated specifically with the assignment of reviewers to submitted papers. We will introduce Reviewerly, our innovative solution designed to enhance the efficiency and effectiveness of reviewer assignments by leveraging data from diverse sources, including OpenAlex, PubMed, and DBLP. By modeling the reviewer assignment problem as an information retrieval task, we focus on retrieving a pool of relevant and diverse reviewers for each paper. We will highlight the challenges we faced and showcase the benefits of this approach in addressing the reviewer assignment problem.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
The peer review process is a fundamental aspect of academic publishing, ensuring the quality and credibility of scholarly work. In this talk, we will explore the critical challenges associated specifically with the assignment of reviewers to submitted papers. We will introduce Reviewerly, our innovative solution designed to enhance the efficiency and effectiveness of reviewer assignments by leveraging data from diverse sources, including OpenAlex, PubMed, and DBLP. By modeling the reviewer assignment problem as an information retrieval task, we focus on retrieving a pool of relevant and diverse reviewers for each paper. We will highlight the challenges we faced and showcase the benefits of this approach in addressing the reviewer assignment problem.
Close
2023
Arabzadeh, Negar; Bagheri, Ebrahim
A Self-supervised Language Model Selection Strategy for Biomedical Question Answering Journal Article
In: Journal of Biomedical Informatics, 2023.
Abstract | Links | BibTeX | Tags:
@article{JBI2023,
title = {A Self-supervised Language Model Selection Strategy for Biomedical Question Answering},
author = {Negar Arabzadeh and Ebrahim Bagheri},
url = {https://ls3.rnet.torontomu.ca/jbi_2023_arabzadeh/},
year = {2023},
date = {2023-09-01},
urldate = {2023-09-01},
journal = {Journal of Biomedical Informatics},
abstract = {Large neural-based Pre-trained Language Models (PLM) have recently gained much attention due to their noteworthy performance in many downstream Information Retrieval (IR) and Natural Language Processing (NLP) tasks. PLMs can be categorized as either general-purpose, which are trained on resources such as large-scale Web corpora, and domain-specific which are trained on in-domain or mixed-domain corpora. While domain-specific PLMs have shown promising performance on domain-specific tasks, they are significantly more computationally expensive compared to general-purpose PLMs as they have to be either retrained or trained from scratch. The objective of our work in this paper is to explore whether it would be possible to leverage general-purpose PLMs to show competitive performance to domain-specific PLMs without the need for expensive retraining of the PLMs for domain-specific tasks. By focusing specifically on the recent BioASQ Biomedical Question Answering task, we show how different general-purpose PLMs show synergistic behaviour in terms of performance, which can lead to overall notable performance improvement when used in tandem with each other. More concretely, given a set of general-purpose PLMs, we propose a self-supervised method for training a classifier that systematically selects the PLM that is most likely to answer the question correctly on a per-input basis. We show that through such a selection strategy, the performance of general-purpose PLMs can become competitive with domain-specific PLMs while remaining computationally light since there is no need to retrain the large language model itself. We run experiments on the BioASQ dataset, which is a large-scale biomedical question-answering benchmark. We show that utilizing our proposed selection strategy can show statistically significant performance improvements on general-purpose language models with an average of 16.7% when using only lighter models such as DistilBERT and DistilRoBERTa, as well as 14.2% improvement when using relatively larger models such as BERT and RoBERTa and so, their performance become competitive with domain-specific large language models such as PubMedBERT.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}

Close
Large neural-based Pre-trained Language Models (PLM) have recently gained much attention due to their noteworthy performance in many downstream Information Retrieval (IR) and Natural Language Processing (NLP) tasks. PLMs can be categorized as either general-purpose, which are trained on resources such as large-scale Web corpora, and domain-specific which are trained on in-domain or mixed-domain corpora. While domain-specific PLMs have shown promising performance on domain-specific tasks, they are significantly more computationally expensive compared to general-purpose PLMs as they have to be either retrained or trained from scratch. The objective of our work in this paper is to explore whether it would be possible to leverage general-purpose PLMs to show competitive performance to domain-specific PLMs without the need for expensive retraining of the PLMs for domain-specific tasks. By focusing specifically on the recent BioASQ Biomedical Question Answering task, we show how different general-purpose PLMs show synergistic behaviour in terms of performance, which can lead to overall notable performance improvement when used in tandem with each other. More concretely, given a set of general-purpose PLMs, we propose a self-supervised method for training a classifier that systematically selects the PLM that is most likely to answer the question correctly on a per-input basis. We show that through such a selection strategy, the performance of general-purpose PLMs can become competitive with domain-specific PLMs while remaining computationally light since there is no need to retrain the large language model itself. We run experiments on the BioASQ dataset, which is a large-scale biomedical question-answering benchmark. We show that utilizing our proposed selection strategy can show statistically significant performance improvements on general-purpose language models with an average of 16.7% when using only lighter models such as DistilBERT and DistilRoBERTa, as well as 14.2% improvement when using relatively larger models such as BERT and RoBERTa and so, their performance become competitive with domain-specific large language models such as PubMedBERT.
Close
https://ls3.rnet.torontomu.ca/jbi_2023_arabzadeh/
Close
Rad, Radin Hamid; Fani, Hossein; Bagheri, Ebrahim; Kargar, Mehdi; Srivastava, Divesh; Szlichta, Jaroslaw
A Variational Neural Architecture for Skill-based Team Formation Journal Article
In: ACM Transactions on Information Systems, 2023.
Abstract | Links | BibTeX | Tags:
@article{TOIS2023,
title = {A Variational Neural Architecture for Skill-based Team Formation},
author = {Radin Hamid Rad and Hossein Fani and Ebrahim Bagheri and Mehdi Kargar and Divesh Srivastava and Jaroslaw Szlichta},
url = {https://ls3.rnet.torontomu.ca/tois_2023_hamidrad/},
year = {2023},
date = {2023-03-12},
urldate = {2023-03-12},
journal = {ACM Transactions on Information Systems},
abstract = {Team formation is concerned with the identification of a group of experts who have a high likelihood of
effectively collaborating with each other in order to satisfy a collection of input skills. Solutions to this task
have mainly adopted graph operations and at least have the following limitations: (1) they are computationally
demanding as they require finding shortest paths on large collaboration networks; (2) they use various types of
heuristics to reduce the exploration space over the collaboration network in order to become practically feasible;
therefore, their results are not necessarily optimal; and, (3) they are not well-suited for collaboration network
structures given the sparsity of these networks. Our work proposes a variational Bayesian neural network
architecture that learns representations for teams whose members have collaborated with each other in the past.
The learnt representations allow our proposed approach to mine teams that have a past collaborative history
and collectively cover the requested desirable set of skills. Through our experiments, we demonstrate that our
approach shows stronger performance compared to a range of strong team formation techniques from both
quantitative and qualitative perspectives.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}

Close
Team formation is concerned with the identification of a group of experts who have a high likelihood of
effectively collaborating with each other in order to satisfy a collection of input skills. Solutions to this task
have mainly adopted graph operations and at least have the following limitations: (1) they are computationally
demanding as they require finding shortest paths on large collaboration networks; (2) they use various types of
heuristics to reduce the exploration space over the collaboration network in order to become practically feasible;
therefore, their results are not necessarily optimal; and, (3) they are not well-suited for collaboration network
structures given the sparsity of these networks. Our work proposes a variational Bayesian neural network
architecture that learns representations for teams whose members have collaborated with each other in the past.
The learnt representations allow our proposed approach to mine teams that have a past collaborative history
and collectively cover the requested desirable set of skills. Through our experiments, we demonstrate that our
approach shows stronger performance compared to a range of strong team formation techniques from both
quantitative and qualitative perspectives.
Close
https://ls3.rnet.torontomu.ca/tois_2023_hamidrad/
Close
Nguyen, Hoang; Rad, Radin Hamidi; Zarrinkalam, Fattane; Bagheri, Ebrahim
DyHNet: Learning Dynamic Heterogeneous Network Representations Journal Article
In: Information Sciences, 2023.
Abstract | Links | BibTeX | Tags:
@article{,
title = {DyHNet: Learning Dynamic Heterogeneous Network Representations},
author = {Hoang Nguyen and Radin Hamidi Rad and Fattane Zarrinkalam and Ebrahim Bagheri},
url = {https://ls3.rnet.torontomu.ca/wp-content/uploads/2024/04/v1_covered.pdf},
doi = {https://doi.org/10.1016/j.ins.2023.119371},
year = {2023},
date = {2023-07-05},
urldate = {2023-07-05},
journal = {Information Sciences},
abstract = {Many real-world networks, such as social networks, contain structural heterogeneity and experience temporal evolution. However, while there has been growing literature on network representation learning, only a few have addressed the need to learn representations for dynamic heterogeneous networks. The objective of our work in this paper is to introduce DyHNet, which learns representations for such networks and distinguishes itself from the state-of-the-art by systematically capturing (1) local node semantics, (2) global network semantics and (3) longer-range temporal associations between network snapshots when learning network representations. Through experiments on four real-world datasets from different domains, namely IMDB with 4,178 movies, AMiner with 10, 674 papers, Yelp with 2, 693 businesses, and DBLP with 14,376 papers, we demonstrate that our proposed method is able to show consistently better and more robust performance compared to the state-of-the-art techniques on link prediction and node classification tasks. More specifically, we are superior to the best baseline in the temporal link prediction task by approximately 13% and 15% on F1-score for the IMDB and AMiner datasets, respectively. Further, in the node classification task, our findings illustrate that the Micro F1 scores of our proposed model increase by 13% and 17% compared to the runner-up model on the
Yelp and DBLP datasets, respectively.},
key = {Dynamic heterogeneous network, Network representation learning, Random walk},
keywords = {},
pubstate = {published},
tppubtype = {article}
}

Close
Many real-world networks, such as social networks, contain structural heterogeneity and experience temporal evolution. However, while there has been growing literature on network representation learning, only a few have addressed the need to learn representations for dynamic heterogeneous networks. The objective of our work in this paper is to introduce DyHNet, which learns representations for such networks and distinguishes itself from the state-of-the-art by systematically capturing (1) local node semantics, (2) global network semantics and (3) longer-range temporal associations between network snapshots when learning network representations. Through experiments on four real-world datasets from different domains, namely IMDB with 4,178 movies, AMiner with 10, 674 papers, Yelp with 2, 693 businesses, and DBLP with 14,376 papers, we demonstrate that our proposed method is able to show consistently better and more robust performance compared to the state-of-the-art techniques on link prediction and node classification tasks. More specifically, we are superior to the best baseline in the temporal link prediction task by approximately 13% and 15% on F1-score for the IMDB and AMiner datasets, respectively. Further, in the node classification task, our findings illustrate that the Micro F1 scores of our proposed model increase by 13% and 17% compared to the runner-up model on the
Yelp and DBLP datasets, respectively.
Close
https://ls3.rnet.torontomu.ca/wp-content/uploads/2024/04/v1_covered.pdf
doi:https://doi.org/10.1016/j.ins.2023.119371
Close
Rad, Radin Hamidi; Nguyen, Hoang; Al-Obeidat, Feras; Bagheri, Ebrahim; Kargar, Mehdi; Srivastava, Divesh; Szlichta, Jaroslaw; Zarrinkalam, Fattane
Learning Heterogeneous Subgraph Representations for Team Discovery Journal Article Forthcoming
In: Information Retrieval Journal, Forthcoming.
Abstract | Links | BibTeX | Tags:
@article{IRJ2023,
title = {Learning Heterogeneous Subgraph Representations for Team Discovery},
author = {Radin Hamidi Rad and Hoang Nguyen and Feras Al-Obeidat and Ebrahim Bagheri and Mehdi Kargar and Divesh Srivastava and Jaroslaw Szlichta and Fattane Zarrinkalam},
url = {https://ls3.rnet.torontomu.ca/irj_2023_hamidi/},
doi = {https://doi.org/10.1007/s10791-023-09421-6},
year = {2023},
date = {2023-09-01},
urldate = {2023-09-01},
journal = {Information Retrieval Journal},
abstract = {The team discovery task is concerned with finding a group of experts from a collaboration network who would collectively cover a desirable set of skills. Most prior work for team discovery either adopt graph-based or neural mapping approaches. Graph-based approaches are computationally intractable often leading to sub-optimal team selection. Neural mapping approaches have better performance, however, are still limited as they learn individual representations for skills and
experts and are often prone to overfitting given the sparsity of collaboration networks. Thus, we define the team discovery task as one of learning subgraph representations from a heterogeneous collaboration network where the subgraphs represent teams which are then used to identify relevant teams for a given set of skills. As such, our approach captures local (node interactions with each team) and global (subgraph interactions between teams) characteristics of the representation network and allows us to easily map between any homogeneous and heterogeneous subgraphs in the network to effectively discover teams. Our experiments over two real-world datasets from different domains, namely DBLP bibliographic dataset with 10,647 papers and IMDB with 4,882 movies, illustrate that our approach outperforms the state-of-the-art baselines on a range of ranking and quality metrics. More
specifically, in terms of ranking metrics, we are superior to the best baseline by approximately 15% on the DBLP dataset and by approximately 20% on the IMDB dataset. Further, our findings illustrate that our approach consistently shows a robust performance improvement over the baselines.},
keywords = {},
pubstate = {forthcoming},
tppubtype = {article}
}

Close
The team discovery task is concerned with finding a group of experts from a collaboration network who would collectively cover a desirable set of skills. Most prior work for team discovery either adopt graph-based or neural mapping approaches. Graph-based approaches are computationally intractable often leading to sub-optimal team selection. Neural mapping approaches have better performance, however, are still limited as they learn individual representations for skills and
experts and are often prone to overfitting given the sparsity of collaboration networks. Thus, we define the team discovery task as one of learning subgraph representations from a heterogeneous collaboration network where the subgraphs represent teams which are then used to identify relevant teams for a given set of skills. As such, our approach captures local (node interactions with each team) and global (subgraph interactions between teams) characteristics of the representation network and allows us to easily map between any homogeneous and heterogeneous subgraphs in the network to effectively discover teams. Our experiments over two real-world datasets from different domains, namely DBLP bibliographic dataset with 10,647 papers and IMDB with 4,882 movies, illustrate that our approach outperforms the state-of-the-art baselines on a range of ranking and quality metrics. More
specifically, in terms of ranking metrics, we are superior to the best baseline by approximately 15% on the DBLP dataset and by approximately 20% on the IMDB dataset. Further, our findings illustrate that our approach consistently shows a robust performance improvement over the baselines.
Close
https://ls3.rnet.torontomu.ca/irj_2023_hamidi/
doi:https://doi.org/10.1007/s10791-023-09421-6
Close
Khodabakhsh, Maryam; Bagheri, Ebrahim
Learning to Rank and Predict: Multi-Task Learning for Ad hoc Retrieval and Query Performance Prediction Journal Article
In: Information Sciences, 2023.
Abstract | Links | BibTeX | Tags:
@article{InS2023M,
title = {Learning to Rank and Predict: Multi-Task Learning for Ad hoc Retrieval and Query Performance Prediction},
author = {Maryam Khodabakhsh and Ebrahim Bagheri},
url = {https://ls3.rnet.torontomu.ca/is_2023_qpp_khodabakhsh/},
doi = {https://doi.org/10.1016/j.ins.2023.119015},
year = {2023},
date = {2023-04-22},
urldate = {2023-04-22},
journal = {Information Sciences},
abstract = {The ad hoc retrieval task aims at ranking relevant documents to a user query such that the most relevant documents are ranked higher compared to less relevant ones. Given the performance of the ad hoc retrieval task can vary across a range of queries, researchers have extensively explored the interrelated task of query performance prediction, which aims at estimating the quality of the search results for a user query without having access to relevance judgments. Traditionally and to-date, the two tasks have been explored as separate tasks where ad hoc retrieval and query performance prediction have been performed in isolation. In this paper, we propose to learn joint tasks that would perform ad hoc retrieval and at the same time predict the quality of the produced rankings. More specifically, we propose a mutli-task learning approach, called Multi-task Query Performance Prediction Framework (M-QPPF), which learns document ranking and query performance prediction tasks simultaneously. In M-QPPF, we adopt a shared BERT layer, which is fine-tuned to learn representations for query- document pairs in the embedding space such that the representations effectively encode the cross-interaction between the query and documents. We perform comprehensive experiments against state-of-the-art methods over large scale datasets.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}

Close
The ad hoc retrieval task aims at ranking relevant documents to a user query such that the most relevant documents are ranked higher compared to less relevant ones. Given the performance of the ad hoc retrieval task can vary across a range of queries, researchers have extensively explored the interrelated task of query performance prediction, which aims at estimating the quality of the search results for a user query without having access to relevance judgments. Traditionally and to-date, the two tasks have been explored as separate tasks where ad hoc retrieval and query performance prediction have been performed in isolation. In this paper, we propose to learn joint tasks that would perform ad hoc retrieval and at the same time predict the quality of the produced rankings. More specifically, we propose a mutli-task learning approach, called Multi-task Query Performance Prediction Framework (M-QPPF), which learns document ranking and query performance prediction tasks simultaneously. In M-QPPF, we adopt a shared BERT layer, which is fine-tuned to learn representations for query- document pairs in the embedding space such that the representations effectively encode the cross-interaction between the query and documents. We perform comprehensive experiments against state-of-the-art methods over large scale datasets.
Close
https://ls3.rnet.torontomu.ca/is_2023_qpp_khodabakhsh/
doi:https://doi.org/10.1016/j.ins.2023.119015
Close
Salamat, Sara; Arabzadeh, Negar; Seyedsalehi, Shirin; Bigdeli, Amin; Zihayat, Morteza; Bagheri, Ebrahim
Neural Disentanglement of Query Difficulty and Semantics Conference
The 32nd ACM International Conference on Information and Knowledge Management (CIKM 2023), 2023.
Links | BibTeX | Tags:
@conference{CIKM2023-1,
title = {Neural Disentanglement of Query Difficulty and Semantics},
author = {Sara Salamat and Negar Arabzadeh and Shirin Seyedsalehi and Amin Bigdeli and Morteza Zihayat and Ebrahim Bagheri},
url = {https://ls3.rnet.torontomu.ca/cikm_2023_disentanglement_salamat/},
doi = {https://doi.org/10.1145/3583780.3615189},
year = {2023},
date = {2023-09-01},
urldate = {2023-09-01},
booktitle = {The 32nd ACM International Conference on Information and Knowledge Management (CIKM 2023)},
keywords = {},
pubstate = {published},
tppubtype = {conference}
}

Close
https://ls3.rnet.torontomu.ca/cikm_2023_disentanglement_salamat/
doi:https://doi.org/10.1145/3583780.3615189
Close
Arabzadeh, Negar; Rad, Radin Hamidi; Khodabakhsh, Maryam; Bagheri, Ebrahim
Noisy Perturbations for Estimating Query Difficulty in Dense Retrievers Conference
The 32nd ACM International Conference on Information and Knowledge Management (CIKM 2023), 2023.
Abstract | Links | BibTeX | Tags:
@conference{CIKM2023-2,
title = {Noisy Perturbations for Estimating Query Difficulty in Dense Retrievers},
author = {Negar Arabzadeh and Radin Hamidi Rad and Maryam Khodabakhsh and Ebrahim Bagheri},
url = {https://ls3.rnet.torontomu.ca/cikm2023_dense_qpp__arabzadeh/},
doi = {https://dl.acm.org/doi/10.1145/3583780.3615270},
year = {2023},
date = {2023-09-01},
urldate = {2023-09-01},
booktitle = {The 32nd ACM International Conference on Information and Knowledge Management (CIKM 2023)},
abstract = {Query Performance Prediction (QPP), is concerned with assessing the retrieval quality of a ranking method for an input query. Most traditional unsupervised frequency-based models and many recent supervised neural methods have been designed specifically for predicting the performance of sparse retrievers such as BM25. In this paper we propose an unsupervised QPP method for dense neural retrievers which operates by redefining the well-known concept of query robustness i.e., a more robust query to perturbations is an easier query to handle. We propose to generate query perturbations for measuring query robustness by systematically injecting noise into the contextualized neural representation of each query. We then compare the retrieved list for the original query with that of the perturbed query as a way to measure query robustness.
Our experiments on four different query sets including MS MARCO, TREC Deep Learning track 2019 and 2020 and TREC DL-Hard show consistently improved performance on linear and ranking correlation metrics over the state of the art.},
keywords = {},
pubstate = {published},
tppubtype = {conference}
}

Close
Query Performance Prediction (QPP), is concerned with assessing the retrieval quality of a ranking method for an input query. Most traditional unsupervised frequency-based models and many recent supervised neural methods have been designed specifically for predicting the performance of sparse retrievers such as BM25. In this paper we propose an unsupervised QPP method for dense neural retrievers which operates by redefining the well-known concept of query robustness i.e., a more robust query to perturbations is an easier query to handle. We propose to generate query perturbations for measuring query robustness by systematically injecting noise into the contextualized neural representation of each query. We then compare the retrieved list for the original query with that of the perturbed query as a way to measure query robustness.
Our experiments on four different query sets including MS MARCO, TREC Deep Learning track 2019 and 2020 and TREC DL-Hard show consistently improved performance on linear and ranking correlation metrics over the state of the art.
Close
https://ls3.rnet.torontomu.ca/cikm2023_dense_qpp__arabzadeh/
doi:https://dl.acm.org/doi/10.1145/3583780.3615270
Close
Bigdeli, Amin; Arabzadeh, Negar; Seyedsalehi, Shirin; Mitra, Bhaskar; Zihayat, Morteza; Bagheri, Ebrahim
De-Biasing Relevance Judgements for Fair Ranking Proceedings Article
In: The 45th European Conference on Information Retrieval (ECIR 2023), 2023.
Abstract | Links | BibTeX | Tags:
@inproceedings{Ecir2023d,
title = {De-Biasing Relevance Judgements for Fair Ranking },
author = {Amin Bigdeli and Negar Arabzadeh and Shirin Seyedsalehi and Bhaskar Mitra and Morteza Zihayat and Ebrahim Bagheri},
url = {http://ls3.rnet.ryerson.ca/wp-content/uploads/2023/03/De-biasing-Relevance-Judgements-for-Fair.pdf},
doi = {https://doi.org/10.1007/978-3-031-28238-6_24},
year = {2023},
date = {2023-04-02},
urldate = {2023-04-02},
booktitle = {The 45th European Conference on Information Retrieval (ECIR 2023)},
abstract = {The objective of this paper is to show that it is possible to significantly reduce stereotypical gender biases in neural rankers without modifying the ranking loss function, which is the current approach in the literature. We systematically de-bias gold standard relevance judgement datasets with a set of balanced and well-matched query pairs. Such a de-biasing process will expose neural rankers to comparable queries from across gender identities that have associated relevant documents with compatible degrees of gender bias. Therefore, neural rankers will learn not to associate varying degrees of bias to queries from certain gender identities. Our experiments show that our approach is able to (1) systematically reduces gender biases associated with different gender identities, and (2) at the same time maintain the same level of retrieval effectiveness.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
The objective of this paper is to show that it is possible to significantly reduce stereotypical gender biases in neural rankers without modifying the ranking loss function, which is the current approach in the literature. We systematically de-bias gold standard relevance judgement datasets with a set of balanced and well-matched query pairs. Such a de-biasing process will expose neural rankers to comparable queries from across gender identities that have associated relevant documents with compatible degrees of gender bias. Therefore, neural rankers will learn not to associate varying degrees of bias to queries from certain gender identities. Our experiments show that our approach is able to (1) systematically reduces gender biases associated with different gender identities, and (2) at the same time maintain the same level of retrieval effectiveness.
Close
http://ls3.rnet.ryerson.ca/wp-content/uploads/2023/03/De-biasing-Relevance-Judge[...]
doi:https://doi.org/10.1007/978-3-031-28238-6_24
Close
Salamat, Sara; Arabzadeh, Negar; Bigdeli, Amin; Seyedsalehi, Shirin; Zihayat, Morteza; Bagheri, Ebrahim
Don't Raise Your Voice, Improve Your Argument: Learning to Retrieve Convincing Arguments Proceedings Article
In: The 45th European Conference on Information Retrieval (ECIR 2023), 2023.
Abstract | Links | BibTeX | Tags:
@inproceedings{Ecir2023b,
title = {Don't Raise Your Voice, Improve Your Argument: Learning to Retrieve Convincing Arguments },
author = {Sara Salamat and Negar Arabzadeh and Amin Bigdeli and Shirin Seyedsalehi and Morteza Zihayat and Ebrahim Bagheri},
url = {https://ls3.rnet.torontomu.ca/ecir2023_argument_salamat/},
doi = {https://doi.org/10.1007/978-3-031-28238-6_50},
year = {2023},
date = {2023-04-02},
urldate = {2023-04-02},
booktitle = {The 45th European Conference on Information Retrieval (ECIR 2023)},
abstract = {The Information Retrieval community has made strides in developing neural rankers, which have show strong retrieval effectiveness on large-scale gold standard datasets. The focus of existing neural rankers has primarily been on measuring the relevance of a document or passage to the user query. However, other considerations such as the convincingness of the content are not taken into account when retrieving content. We present a large gold standard dataset, referred to as CoRe, which focuses on enabling researchers to explore the integration of the concepts of convincingness and relevance to allow for the retrieval of relevant yet persuasive content. Through extensive experiments on this dataset, we report that there is a close association between convincingness and relevance that can have practical value in how convincing content are presented and retrieved in practice.

},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
The Information Retrieval community has made strides in developing neural rankers, which have show strong retrieval effectiveness on large-scale gold standard datasets. The focus of existing neural rankers has primarily been on measuring the relevance of a document or passage to the user query. However, other considerations such as the convincingness of the content are not taken into account when retrieving content. We present a large gold standard dataset, referred to as CoRe, which focuses on enabling researchers to explore the integration of the concepts of convincingness and relevance to allow for the retrieval of relevant yet persuasive content. Through extensive experiments on this dataset, we report that there is a close association between convincingness and relevance that can have practical value in how convincing content are presented and retrieved in practice.

Close
https://ls3.rnet.torontomu.ca/ecir2023_argument_salamat/
doi:https://doi.org/10.1007/978-3-031-28238-6_50
Close
Salamat, Sara; Arabzadeh, Negar; Zarrinkalam, Fattane; Zihayat, Morteza; Bagheri, Ebrahim
Learning Query-Space Document Representations for High-Recall Retrieval Proceedings Article
In: The 45th European Conference on Information Retrieval (ECIR 2023), 2023.
Abstract | Links | BibTeX | Tags:
@inproceedings{Ecir2023e,
title = {Learning Query-Space Document Representations for High-Recall Retrieval },
author = {Sara Salamat and Negar Arabzadeh and Fattane Zarrinkalam and Morteza Zihayat and Ebrahim Bagheri},
url = {https://ls3.rnet.torontomu.ca/ecir2023_representations_salamat/},
doi = {https://doi.org/10.1007/978-3-031-28238-6_51},
year = {2023},
date = {2023-04-02},
urldate = {2023-04-02},
booktitle = {The 45th European Conference on Information Retrieval (ECIR 2023)},
abstract = {Recent studies have shown that significant performance improvements reported by neural rankers do not necessarily extend to a diverse range of queries. There is a large set of queries that cannot be effectively addressed by neural rankers primarily because relevant documents to these queries are not identified by first-stage retrievers. In this paper, we propose a novel document representation approach that represents documents within the query space, and hence increases the likelihood of recalling a higher number of relevant documents. Based on experiments on the MS MARCO dataset as well as the hardest subset of its queries, we find that the proposed approach shows synergistic behavior to existing neural rankers and is able to increase recall both on MS MARCO dev set queries as well as the hardest queries of MS MARCO.

},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
Recent studies have shown that significant performance improvements reported by neural rankers do not necessarily extend to a diverse range of queries. There is a large set of queries that cannot be effectively addressed by neural rankers primarily because relevant documents to these queries are not identified by first-stage retrievers. In this paper, we propose a novel document representation approach that represents documents within the query space, and hence increases the likelihood of recalling a higher number of relevant documents. Based on experiments on the MS MARCO dataset as well as the hardest subset of its queries, we find that the proposed approach shows synergistic behavior to existing neural rankers and is able to increase recall both on MS MARCO dev set queries as well as the hardest queries of MS MARCO.

Close
https://ls3.rnet.torontomu.ca/ecir2023_representations_salamat/
doi:https://doi.org/10.1007/978-3-031-28238-6_51
Close
Vo, Duc-Thuan; Zarrinkalam, Fattane; Ba Pham, Negar Arabzadeh; Salamat, Sara; Bagheri, Ebrahim
Neural Ad hoc Retrieval Meets Information Extraction Proceedings Article
In: The 45th European Conference on Information Retrieval (ECIR 2023), 2023.
Abstract | Links | BibTeX | Tags:
@inproceedings{Ecir2023c,
title = {Neural Ad hoc Retrieval Meets Information Extraction },
author = {Duc-Thuan Vo and Fattane Zarrinkalam and Ba Pham, Negar Arabzadeh and Sara Salamat and Ebrahim Bagheri},
url = {https://ls3.rnet.torontomu.ca/ecir_2023_zarrinkalam/},
doi = {https://doi.org/10.1007/978-3-031-28238-6_57},
year = {2023},
date = {2023-04-02},
urldate = {2023-04-02},
booktitle = {The 45th European Conference on Information Retrieval (ECIR 2023)},
abstract = {This paper presents the idea of systematically integrating relation triples derived from Open Information Extraction (OpenIE) with neural rankers in order to improve the performance of the ad-hoc retrieval task. This is motivated by two reasons: (1) to capture longer-range semantic associations between keywords in documents, which would not otherwise be immediately identifiable by neural rankers; and (2) identify closely mentioned yet semantically unrelated content in the document that could lead to a document being incorrectly considered to be relevant for the query. Through our extensive experiments on three widely used TREC collections, we show that our idea consistently leads to noticeable performance improvements for neural rankers on a range of metrics.
},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
This paper presents the idea of systematically integrating relation triples derived from Open Information Extraction (OpenIE) with neural rankers in order to improve the performance of the ad-hoc retrieval task. This is motivated by two reasons: (1) to capture longer-range semantic associations between keywords in documents, which would not otherwise be immediately identifiable by neural rankers; and (2) identify closely mentioned yet semantically unrelated content in the document that could lead to a document being incorrectly considered to be relevant for the query. Through our extensive experiments on three widely used TREC collections, we show that our idea consistently leads to noticeable performance improvements for neural rankers on a range of metrics.

Close
https://ls3.rnet.torontomu.ca/ecir_2023_zarrinkalam/
doi:https://doi.org/10.1007/978-3-031-28238-6_57
Close
Arabzadeh, Negar; Bigdeli, Amin; Rad, Radin Hamidi; Bagheri, Ebrahim
Quantifying Ranker Coverage of Different Query Subspaces Proceedings Article
In: 46th International ACM SIGIR Conference on Research and Development in Information Retrieval (SIGIR 2023), 2023.
Abstract | Links | BibTeX | Tags:
@inproceedings{SIGIR2023,
title = {Quantifying Ranker Coverage of Different Query Subspaces },
author = {Negar Arabzadeh and Amin Bigdeli and Radin Hamidi Rad and Ebrahim Bagheri},
url = {https://ls3.rnet.torontomu.ca/sigir2023__tasc_arabzadeh/},
doi = {https://dl.acm.org/doi/abs/10.1145/3539618.3592045},
year = {2023},
date = {2023-04-05},
urldate = {2023-04-05},
booktitle = {46th International ACM SIGIR Conference on Research and Development in Information Retrieval (SIGIR 2023)},
abstract = {The information retrieval community has observed significant performance improvements over various tasks due to the introduction of neural architectures. However, such improvements do not necessarily seem to have happened uniformly across a range of queries. As we will empirically show in this paper, the performance of neural rankers follow a long-tail distribution where there are many subsets of queries, which are not effectively satisfied by neural methods. Despite this observation, performance is often reported using standard retrieval metrics, such as MRR or nDCG, which capture average performance over all queries. As such, it is not clear whether reported improvements are due to incremental boost on a small subset of already well-performing queries or addressing queries that have been difficult to address by existing methods. In this paper, we propose the Task Subspace Coverage (TaSC /tAHsk/) metric, which systematically quantifies whether and to what extent improvements in retrieval effectiveness happen on similar or disparate query subspaces for different rankers. Our experiments show that the consideration of our proposed TaSC metric in conjunction with existing ranking metrics provides deeper insight into ranker performance and their contribution to overall advances on a given task.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
The information retrieval community has observed significant performance improvements over various tasks due to the introduction of neural architectures. However, such improvements do not necessarily seem to have happened uniformly across a range of queries. As we will empirically show in this paper, the performance of neural rankers follow a long-tail distribution where there are many subsets of queries, which are not effectively satisfied by neural methods. Despite this observation, performance is often reported using standard retrieval metrics, such as MRR or nDCG, which capture average performance over all queries. As such, it is not clear whether reported improvements are due to incremental boost on a small subset of already well-performing queries or addressing queries that have been difficult to address by existing methods. In this paper, we propose the Task Subspace Coverage (TaSC /tAHsk/) metric, which systematically quantifies whether and to what extent improvements in retrieval effectiveness happen on similar or disparate query subspaces for different rankers. Our experiments show that the consideration of our proposed TaSC metric in conjunction with existing ranking metrics provides deeper insight into ranker performance and their contribution to overall advances on a given task.
Close
https://ls3.rnet.torontomu.ca/sigir2023__tasc_arabzadeh/
doi:https://dl.acm.org/doi/abs/10.1145/3539618.3592045
Close
Bigdeli, Amin; Arabzadeh, Negar; Seyedsalehi, Shirin; Zihayat, Morteza; Bagheri, Ebrahim
Understanding and Mitigating Gender Bias in Information Retrieval Systems Proceedings Article
In: The 45th European Conference on Information Retrieval (ECIR 2023) (tutorial), 2023.
Links | BibTeX | Tags:
@inproceedings{Ecir2023a,
title = {Understanding and Mitigating Gender Bias in Information Retrieval Systems},
author = {Amin Bigdeli and Negar Arabzadeh and Shirin Seyedsalehi and Morteza Zihayat and Ebrahim Bagheri},
url = {https://ls3.rnet.torontomu.ca/ecir2023_tutorial_bigdeli/},
doi = {https://doi.org/10.1007/978-3-031-28241-6_32},
year = {2023},
date = {2023-04-02},
urldate = {2023-04-02},
booktitle = {The 45th European Conference on Information Retrieval (ECIR 2023) (tutorial)},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
https://ls3.rnet.torontomu.ca/ecir2023_tutorial_bigdeli/
doi:https://doi.org/10.1007/978-3-031-28241-6_32
Close
2022
Mahdavimoghaddam, Jalehsadat; Bahuguna, Ayush; Bagheri, Ebrahim
Exploring the Utility of Social Content for Understanding Future In-Demand Skills Journal Article
In: Proc. ACM Hum. Comput. Interact. (CSCW), 2022.
Abstract | Links | BibTeX | Tags:
@article{cscw2022,
title = { Exploring the Utility of Social Content for Understanding Future In-Demand Skills },
author = {Jalehsadat Mahdavimoghaddam and Ayush Bahuguna and Ebrahim Bagheri },
url = {https://ls3.rnet.torontomu.ca/3555114_compressed/},
doi = {https://doi.org/10.1145/3555114},
year = {2022},
date = {2022-03-30},
urldate = {2022-03-30},
journal = {Proc. ACM Hum. Comput. Interact. (CSCW)},
abstract = { Rapid technological innovations, especially in the Information Technology space, demand the workforce to be vigilant by acquiring new skills to remain relevant and employable. The workforce needs to be engaged in a continuous lifelong learning process by educating themselves about skills that will be in-demand in the future. To do so, it is important for students, job seekers, and even recruiters to know which skills will be in demand in the future to invest time and resources in these skills. On this basis, the main objective of this paper is to investigate whether social content can offer insight into potential future in-demand skills in the IT job market. Based on the analysis of social content from Reddit and Job Posting data from Dice and Monster websites, we find that social content related to job skills are strong indicators for future in-demand skills. We further find that specific social content associated with recruitment-related topics are stronger indicators of future skills. Our findings encourage learners and job seekers to pay close attention to online social content to forward plan new skills and maximize their employability.
},
keywords = {},
pubstate = {published},
tppubtype = {article}
}

Close
Rapid technological innovations, especially in the Information Technology space, demand the workforce to be vigilant by acquiring new skills to remain relevant and employable. The workforce needs to be engaged in a continuous lifelong learning process by educating themselves about skills that will be in-demand in the future. To do so, it is important for students, job seekers, and even recruiters to know which skills will be in demand in the future to invest time and resources in these skills. On this basis, the main objective of this paper is to investigate whether social content can offer insight into potential future in-demand skills in the IT job market. Based on the analysis of social content from Reddit and Job Posting data from Dice and Monster websites, we find that social content related to job skills are strong indicators for future in-demand skills. We further find that specific social content associated with recruitment-related topics are stronger indicators of future skills. Our findings encourage learners and job seekers to pay close attention to online social content to forward plan new skills and maximize their employability.

Close
https://ls3.rnet.torontomu.ca/3555114_compressed/
doi:https://doi.org/10.1145/3555114
Close
Hosseini, Hawre; Mansouri, Mehran; Bagheri, Ebrahim
A Systemic Functional Linguistics Approach to Implicit Entity Recognition in Tweets Journal Article
In: Information Processing and Management, vol. 59, iss. 4, no. 102957, 2022.
Abstract | Links | BibTeX | Tags:
@article{ipm2020-hawre,
title = {A Systemic Functional Linguistics Approach to Implicit Entity Recognition in Tweets},
author = {Hawre Hosseini and Mehran Mansouri and Ebrahim Bagheri},
url = {https://ls3.rnet.torontomu.ca/1-s2-0-s0306457322000772-main/},
doi = {https://doi.org/10.1016/j.ipm.2022.102957},
year = {2022},
date = {2022-04-23},
urldate = {2022-04-23},
journal = {Information Processing and Management},
volume = {59},
number = {102957},
issue = {4},
abstract = {The identification of knowledge graph entity mentions in textual content has already attracted much attention. The major assumption of existing work is that entities are explicitly mentioned in text and would only need to be disambiguated and linked. However, this assumption does not necessarily hold for social content where a significant portion of information is implied. The focus of our work in this paper is to identify whether textual social content include implicit mentions of knowledge graph entities or not, hence forming a two-class classification problem. To this end, we adopt the systemic functional linguistic framework that allows for capturing meaning expressed through language. Based on this theoretical framework we systematically introduce two classes of features, namely syntagmatic and paradigmatic features, for implicit entity recognition. In our experiments, we show the utility of these features for the task, report on ablation studies, measure the impact of each feature subset on each other and also provide a detailed error analysis of our technique.
},
keywords = {},
pubstate = {published},
tppubtype = {article}
}

Close
The identification of knowledge graph entity mentions in textual content has already attracted much attention. The major assumption of existing work is that entities are explicitly mentioned in text and would only need to be disambiguated and linked. However, this assumption does not necessarily hold for social content where a significant portion of information is implied. The focus of our work in this paper is to identify whether textual social content include implicit mentions of knowledge graph entities or not, hence forming a two-class classification problem. To this end, we adopt the systemic functional linguistic framework that allows for capturing meaning expressed through language. Based on this theoretical framework we systematically introduce two classes of features, namely syntagmatic and paradigmatic features, for implicit entity recognition. In our experiments, we show the utility of these features for the task, report on ablation studies, measure the impact of each feature subset on each other and also provide a detailed error analysis of our technique.

Close
https://ls3.rnet.torontomu.ca/1-s2-0-s0306457322000772-main/
doi:https://doi.org/10.1016/j.ipm.2022.102957
Close
Etemadi, Roohollah; Zihayat, Morteza; Feng, Kuan; Adelman, Jason; Bagheri, Ebrahim
Embedding-based Team Formation for Community Question Answering Journal Article
In: Information Sciences, 2022.
Abstract | Links | BibTeX | Tags:
@article{Etemadi2022,
title = {Embedding-based Team Formation for Community Question Answering},
author = {Roohollah Etemadi and Morteza Zihayat and Kuan Feng and Jason Adelman and Ebrahim Bagheri},
url = {https://ls3.rnet.torontomu.ca/1-s2-0-s0020025522010829-main/},
doi = {https://doi.org/10.1016/j.ins.2022.09.036},
year = {2022},
date = {2022-09-09},
urldate = {2022-09-09},
journal = {Information Sciences},
abstract = {Finding a qualified individual who can independently answer a question on a community question answering platform is becoming more challenging due to the increasing multidisciplinary nature of posted questions. As such, finding a group of experts to collaboratively answer the questions is of paramount importance. To this end, our proposed method forms teams of experts who can collectively answer new questions. Our approach, called team2box, learns neural embedding representations based on the content of the posted questions, experts’ engagement with these questions, and past expert collaboration history in order to form a team to answer the posted question. It embeds experts and questions as points and existing teams as regions within the embedding space. Such an approach allows team2box to form a team whose members (1) collectively cover the knowledge required to answer a question, (2) have successful past experience in jointly answering similar questions, and (3) can work efficiently together to answer the question. Extensive experiments on real-life datasets from Stack Exchange show that team2box outperforms the state-of-the-art by discovering teams with on average 38.97% more covering the skills required to answer new questions and employing experts with collectively a high expertise level. },
keywords = {},
pubstate = {published},
tppubtype = {article}
}

Close
Finding a qualified individual who can independently answer a question on a community question answering platform is becoming more challenging due to the increasing multidisciplinary nature of posted questions. As such, finding a group of experts to collaboratively answer the questions is of paramount importance. To this end, our proposed method forms teams of experts who can collectively answer new questions. Our approach, called team2box, learns neural embedding representations based on the content of the posted questions, experts’ engagement with these questions, and past expert collaboration history in order to form a team to answer the posted question. It embeds experts and questions as points and existing teams as regions within the embedding space. Such an approach allows team2box to form a team whose members (1) collectively cover the knowledge required to answer a question, (2) have successful past experience in jointly answering similar questions, and (3) can work efficiently together to answer the question. Extensive experiments on real-life datasets from Stack Exchange show that team2box outperforms the state-of-the-art by discovering teams with on average 38.97% more covering the skills required to answer new questions and employing experts with collectively a high expertise level.
Close
https://ls3.rnet.torontomu.ca/1-s2-0-s0020025522010829-main/
doi:https://doi.org/10.1016/j.ins.2022.09.036
Close
Sorkhani, Soroosh; Bigdeli, Amin; Etemadi, Roohollah; Zihayat, Morteza; Bagheri, Ebrahim
Feature-based Question Routing in Community Question Answering Platforms Journal Article
In: Information Sciences, 2022.
Abstract | Links | BibTeX | Tags:
@article{INS-2022b,
title = {Feature-based Question Routing in Community Question Answering Platforms },
author = {Soroosh Sorkhani and Amin Bigdeli and Roohollah Etemadi and Morteza Zihayat and Ebrahim Bagheri},
url = {https://ls3.rnet.torontomu.ca/1-s2-0-s0020025522006661-main_compressed/},
doi = {https://doi.org/10.1016/j.ins.2022.06.072},
year = {2022},
date = {2022-06-19},
urldate = {2022-06-19},
journal = {Information Sciences},
abstract = {Community question answering (CQA) platforms are receiving increased attention and are becoming an indispensable source of information in different domains ranging from board games to physics. The success of these platforms dependent on how efficiently new questions are assigned to community experts, known as called question routing. In this paper, we address the problem of question routing by adopting a learning to rank approach over five CQA websites in the context of which we introduce 74 features and systematically classify them into content-based and social-based categories. Our extensive experiments on datasets from five real online question answering websites indicate that content-based features related to tags and topics as well as social features that are related to user characteristics and user temporality are effective for question routing. Our work shows the ability to improve performance compared to the state-of-the-art neural matchmaking methods that lack the interpretability offered by our work. The improvement can be as high as on average 2.74% and20 2.27% in terms of common ranking metrics, Normalized Discounted Cumulative Gain (NDCG) and Mean Average Precision (MAP) respectively, compared to our best baselines.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}

Close
Community question answering (CQA) platforms are receiving increased attention and are becoming an indispensable source of information in different domains ranging from board games to physics. The success of these platforms dependent on how efficiently new questions are assigned to community experts, known as called question routing. In this paper, we address the problem of question routing by adopting a learning to rank approach over five CQA websites in the context of which we introduce 74 features and systematically classify them into content-based and social-based categories. Our extensive experiments on datasets from five real online question answering websites indicate that content-based features related to tags and topics as well as social features that are related to user characteristics and user temporality are effective for question routing. Our work shows the ability to improve performance compared to the state-of-the-art neural matchmaking methods that lack the interpretability offered by our work. The improvement can be as high as on average 2.74% and20 2.27% in terms of common ranking metrics, Normalized Discounted Cumulative Gain (NDCG) and Mean Average Precision (MAP) respectively, compared to our best baselines.
Close
https://ls3.rnet.torontomu.ca/1-s2-0-s0020025522006661-main_compressed/
doi:https://doi.org/10.1016/j.ins.2022.06.072
Close
Azimy, Hamid; Ghorbani, Ali A.; Bagheri, Ebrahim
Preventing Proof-of-Work Mining Attacks Journal Article
In: Information Sciences, 2022.
Abstract | Links | BibTeX | Tags:
@article{INS-2022c,
title = {Preventing Proof-of-Work Mining Attacks},
author = {Hamid Azimy and Ali A. Ghorbani and Ebrahim Bagheri},
url = {https://ls3.rnet.torontomu.ca/1-s2-0-s0020025522007241-main/},
doi = {https://doi.org/10.1016/j.ins.2022.07.035},
year = {2022},
date = {2022-07-07},
urldate = {2022-07-07},
journal = {Information Sciences},
abstract = {Bitcoin mining is the process of generating new blocks in the Bitcoin blockchain. This process is vulnerable to different types of attacks. One of the most famous attacks in this category is selfish mining. This attack is essentially a strategy that a sufficiently powerful mining pool can follow to obtain more revenue than its fair share. The reason that selfish mining is effective is the difficulty adjustment algorithm used in the Bitcoin network. In this paper, we analyze the profitability of selfish mining with respect to time and propose an alternative difficulty adjustment algorithm that discourages selfish mining while allowing the Bitcoin network to remain scalable. We analyze our proposed solution, present the results, and discuss its effectiveness. Based on our analysis, our proposed algorithm effectively increases the profitability waiting time for the attackers to almost double its original value. For example, for a miner with 40% of the network's hash power, the algorithm extends the waiting time from 4 weeks to more than 11 weeks. This will discourage attackers from performing their malicious activities. We also show that our proposed algorithm allows the network to scale while it increases the waiting time.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}

Close
Bitcoin mining is the process of generating new blocks in the Bitcoin blockchain. This process is vulnerable to different types of attacks. One of the most famous attacks in this category is selfish mining. This attack is essentially a strategy that a sufficiently powerful mining pool can follow to obtain more revenue than its fair share. The reason that selfish mining is effective is the difficulty adjustment algorithm used in the Bitcoin network. In this paper, we analyze the profitability of selfish mining with respect to time and propose an alternative difficulty adjustment algorithm that discourages selfish mining while allowing the Bitcoin network to remain scalable. We analyze our proposed solution, present the results, and discuss its effectiveness. Based on our analysis, our proposed algorithm effectively increases the profitability waiting time for the attackers to almost double its original value. For example, for a miner with 40% of the network's hash power, the algorithm extends the waiting time from 4 weeks to more than 11 weeks. This will discourage attackers from performing their malicious activities. We also show that our proposed algorithm allows the network to scale while it increases the waiting time.
Close
https://ls3.rnet.torontomu.ca/1-s2-0-s0020025522007241-main/
doi:https://doi.org/10.1016/j.ins.2022.07.035
Close
Khodabakhsh, Maryam; Bagheri, Ebrahim
Qualitative Measures for Ad hoc Table Retrieval Journal Article
In: Information Sciences, 2022.
Abstract | Links | BibTeX | Tags:
@article{INS-2022a,
title = {Qualitative Measures for Ad hoc Table Retrieval},
author = {Maryam Khodabakhsh and Ebrahim Bagheri},
url = {https://ls3.rnet.torontomu.ca/1-s2-0-s0020025522005126-main/},
doi = {https://doi.org/10.1016/j.ins.2022.05.080},
year = {2022},
date = {2022-05-23},
urldate = {2022-05-23},
journal = {Information Sciences},
abstract = {The focus of our work is the ad hoc table retrieval task, which aims to rank a list of structured tabular objects in response to a user query. Given the importance of this task, various methods have already been proposed in the literature that focus on syntactic, semantic and neural representations of tables for determining table relevance. However, recent works have highlighted queries that are consistently difficult for baseline methods to satisfy, referred to as hard queries. For this reason, the objectives of this paper include: (1) effectively satisfying hard queries by proposing three classes of qualitative measures, namely coherence, interpretability and exactness, (2) offering a systematic approach to interpolate these three classes of measures with each other and with baseline table retrieval methods, and (3) performing extensive experiments using a range of baseline retrieval methods to show the feasibility of the proposed measures for hard queries. We demonstrate that the consideration of the proposed qualitative measures will lead to improved performance for hard queries on a range of state-of-the-art ad hoc table retrieval baselines. We further show that our proposed measures are synergistic and will lead to even higher performance improvements over the baselines when interpolated with each other. The improvements measure up to 22.94% on the Semantic Table Retrieval (STR) method with an NDCG@20 of 0.5, which is superior to the performance of any state-of-the-art baseline for hard queries in the ad hoc table retrieval task.
},
keywords = {},
pubstate = {published},
tppubtype = {article}
}

Close
The focus of our work is the ad hoc table retrieval task, which aims to rank a list of structured tabular objects in response to a user query. Given the importance of this task, various methods have already been proposed in the literature that focus on syntactic, semantic and neural representations of tables for determining table relevance. However, recent works have highlighted queries that are consistently difficult for baseline methods to satisfy, referred to as hard queries. For this reason, the objectives of this paper include: (1) effectively satisfying hard queries by proposing three classes of qualitative measures, namely coherence, interpretability and exactness, (2) offering a systematic approach to interpolate these three classes of measures with each other and with baseline table retrieval methods, and (3) performing extensive experiments using a range of baseline retrieval methods to show the feasibility of the proposed measures for hard queries. We demonstrate that the consideration of the proposed qualitative measures will lead to improved performance for hard queries on a range of state-of-the-art ad hoc table retrieval baselines. We further show that our proposed measures are synergistic and will lead to even higher performance improvements over the baselines when interpolated with each other. The improvements measure up to 22.94% on the Semantic Table Retrieval (STR) method with an NDCG@20 of 0.5, which is superior to the performance of any state-of-the-art baseline for hard queries in the ad hoc table retrieval task.

Close
https://ls3.rnet.torontomu.ca/1-s2-0-s0020025522005126-main/
doi:https://doi.org/10.1016/j.ins.2022.05.080
Close
Mirlohi, Amin; Mahdavimoghaddam, Jalehsadat; Jovanovic, Jelena; Al-Obeidat, Feras; Ghorbani, Ali A; Bagheri, Ebrahim
Social Alignment Contagion in Online Social Networks Journal Article
In: IEEE Transactions on Computational Social Systems, 2022.
Abstract | Links | BibTeX | Tags:
@article{Mirlohi2022,
title = {Social Alignment Contagion in Online Social Networks},
author = {Amin Mirlohi and Jalehsadat Mahdavimoghaddam and Jelena Jovanovic and Feras Al-Obeidat and Ali A Ghorbani and Ebrahim Bagheri},
url = {https://ls3.rnet.torontomu.ca/social_alignment_contagion_in_online_social_networks_compressed/},
doi = {https://doi.org/10.1109/TCSS.2022.3226346},
year = {2022},
date = {2022-11-29},
urldate = {2022-11-29},
journal = {IEEE Transactions on Computational Social Systems},
abstract = {Researchers have already observed social contagion effects in both in-person and online interactions. However, such studies have primarily focused on users’ beliefs, mental states, and interests. In this article, we expand the state of the art by exploring the impact of social contagion on social alignment, i.e., whether the decision to socially align oneself with the general opinion of the users on the social network is contagious to one’s connections on the network or not. The novelty of our work in this article includes: 1) unlike earlier work, this article is among the first to explore the contagiousness of the concept of social alignment on social networks; 2) our work adopts an instrumental variable approach to determine reliable causal relations between observed social contagion effects on the social network; and 3) our work expands beyond the mere presence of contagion in social alignment and also explores the role of population heterogeneity on social alignment contagion. Based on the systematic collection and analysis of data from two large social network platforms, namely, Twitter and Foursquare, we find that a user’s decision to socially align or distance from social topics and sentiments influences the social alignment decisions of their connections on the social network. We further find that such social alignment decisions are significantly impacted by population heterogeneity.
},
keywords = {},
pubstate = {published},
tppubtype = {article}
}

Close
Researchers have already observed social contagion effects in both in-person and online interactions. However, such studies have primarily focused on users’ beliefs, mental states, and interests. In this article, we expand the state of the art by exploring the impact of social contagion on social alignment, i.e., whether the decision to socially align oneself with the general opinion of the users on the social network is contagious to one’s connections on the network or not. The novelty of our work in this article includes: 1) unlike earlier work, this article is among the first to explore the contagiousness of the concept of social alignment on social networks; 2) our work adopts an instrumental variable approach to determine reliable causal relations between observed social contagion effects on the social network; and 3) our work expands beyond the mere presence of contagion in social alignment and also explores the role of population heterogeneity on social alignment contagion. Based on the systematic collection and analysis of data from two large social network platforms, namely, Twitter and Foursquare, we find that a user’s decision to socially align or distance from social topics and sentiments influences the social alignment decisions of their connections on the social network. We further find that such social alignment decisions are significantly impacted by population heterogeneity.

Close
https://ls3.rnet.torontomu.ca/social_alignment_contagion_in_online_social_networ[...]
doi:https://doi.org/10.1109/TCSS.2022.3226346
Close
Nguyen, Hoang; Rad, Radin Hamidi; Bagheri, Ebrahim
PyDHNet: A Python Library for Dynamic Heterogeneous Network Representation Learning and Evaluation Conference
31st ACM International Conference on Information and Knowledge Management (CIKM 2022), 2022.
Abstract | Links | BibTeX | Tags:
@conference{cikm2022,
title = {PyDHNet: A Python Library for Dynamic Heterogeneous Network Representation Learning and Evaluation},
author = {Hoang Nguyen and Radin Hamidi Rad and Ebrahim Bagheri},
url = {https://ls3.rnet.torontomu.ca/3511808-3557181/},
doi = {https://doi.org/10.1145/3511808.3557181},
year = {2022},
date = {2022-08-16},
urldate = {2022-08-16},
booktitle = {31st ACM International Conference on Information and Knowledge Management (CIKM 2022)},
abstract = {Network representation learning and its applications have received increasing attention. Due to their various application areas, many research groups have developed a diverse range of software tools and techniques to learn representation for different types of networks. However, to the best of our knowledge, there are limited works that support representation learning for dynamic heterogeneous networks. The work presented in this demonstration paper attempts to fill the gap in this space by developing and publicly releasing an open-source Python library known as, PyDHNet, a Python Library for Dynamic Heterogeneous Network Representation Learning and Evaluation. PyDHNet consists of two main components: dynamic heterogeneous network representation learning and task-specific evaluation. In our paper, we demonstrate that PyDHNet has an extensible architecture, is easy to install (through PIP) and use, and integrates quite seamlessly with other Python libraries. We also show that the implementation for PyDHNet is efficient and enjoys a competitive execution time.

},
keywords = {},
pubstate = {published},
tppubtype = {conference}
}

Close
Network representation learning and its applications have received increasing attention. Due to their various application areas, many research groups have developed a diverse range of software tools and techniques to learn representation for different types of networks. However, to the best of our knowledge, there are limited works that support representation learning for dynamic heterogeneous networks. The work presented in this demonstration paper attempts to fill the gap in this space by developing and publicly releasing an open-source Python library known as, PyDHNet, a Python Library for Dynamic Heterogeneous Network Representation Learning and Evaluation. PyDHNet consists of two main components: dynamic heterogeneous network representation learning and task-specific evaluation. In our paper, we demonstrate that PyDHNet has an extensible architecture, is easy to install (through PIP) and use, and integrates quite seamlessly with other Python libraries. We also show that the implementation for PyDHNet is efficient and enjoys a competitive execution time.

Close
https://ls3.rnet.torontomu.ca/3511808-3557181/
doi:https://doi.org/10.1145/3511808.3557181
Close
Bigdeli, Amin; Arabzadeh, Negar; SeyedSalehi, Shirin; Zihayat, Morteza; Bagheri, Ebrahim
A Light-weight Strategy for Restraining Gender Biases in Neural Rankers Proceedings Article
In: 44th European Conference on IR Research (ECIR 2022), 2022.
Abstract | Links | BibTeX | Tags:
@inproceedings{ecir2022,
title = {A Light-weight Strategy for Restraining Gender Biases in Neural Rankers},
author = {Amin Bigdeli and Negar Arabzadeh and Shirin SeyedSalehi and Morteza Zihayat and Ebrahim Bagheri},
url = {https://ls3.rnet.torontomu.ca/ecir2022_a-light-weight-strategy-for-restraining-gender_/},
doi = {https://doi.org/10.1007/978-3-030-99739-7_6},
year = {2022},
date = {2022-01-01},
urldate = {2022-01-01},
booktitle = {44th European Conference on IR Research (ECIR 2022)},
abstract = {In light of recent studies that show neural retrieval methods may intensify gender biases during retrieval, the objective of this paper is to propose a simple yet effective sampling strategy for training neural rankers that would allow the rankers to maintain their retrieval effectiveness while reducing gender biases. Our work proposes to consider the degrees of gender bias when sampling documents to be used for training neural rankers. We report our findings on the MS MARCO collection and based on different query datasets released for this purpose in the literature. Our results show that the proposed light-weight strategy is able to show competitive (or even better) performance compared to the state of the art neural architectures specifically designed to reduce gender biases.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
In light of recent studies that show neural retrieval methods may intensify gender biases during retrieval, the objective of this paper is to propose a simple yet effective sampling strategy for training neural rankers that would allow the rankers to maintain their retrieval effectiveness while reducing gender biases. Our work proposes to consider the degrees of gender bias when sampling documents to be used for training neural rankers. We report our findings on the MS MARCO collection and based on different query datasets released for this purpose in the literature. Our results show that the proposed light-weight strategy is able to show competitive (or even better) performance compared to the state of the art neural architectures specifically designed to reduce gender biases.
Close
https://ls3.rnet.torontomu.ca/ecir2022_a-light-weight-strategy-for-restraining-g[...]
doi:https://doi.org/10.1007/978-3-030-99739-7_6
Close
Seyedsalehi, Shirin; Arabzadeh, Negar; Bigdeli, Amin; Zihayat, Morteza; Bagheri, Ebrahim
Addressing Gender-related Performance Disparities in Neural Rankers Proceedings Article
In: The 45th International ACM SIGIR Conference on Research and Development in Information Retrieval (SIGIR 2022), 2022.
Abstract | Links | BibTeX | Tags:
@inproceedings{sigir2022a,
title = {Addressing Gender-related Performance Disparities in Neural Rankers},
author = {Shirin Seyedsalehi and Negar Arabzadeh and Amin Bigdeli and Morteza Zihayat and Ebrahim Bagheri
},
url = {https://ls3.rnet.torontomu.ca/3477495-3531882-1/},
doi = {https://doi.org/10.1145/3477495.3531882},
year = {2022},
date = {2022-04-01},
urldate = {2022-04-01},
booktitle = {The 45th International ACM SIGIR Conference on Research and Development in Information Retrieval (SIGIR 2022)},
abstract = {While neural rankers continue to show notable performance improvements over a wide variety of information retrieval tasks, there have been recent studies that show such rankers may intensify certain stereotypical biases. In this paper, we investigate whether neural rankers introduce retrieval effectiveness (performance) disparities over queries related to different genders. We specifically study whether there are significant performance differences between male and female queries when retrieved by neural rankers. Through our empirical study over the MS MARCO collection, we find that such performance disparities are notable and that the performance disparities may be due to the difference between how queries and their relevant judgements are collected and distributed for different gendered queries. More specifically, we observe that male queries are more closely associated with their relevant documents compared to female queries and hence neural rankers are able to more easily learn associations between male queries and their relevant documents. We show that it is possible to systematically balance relevance judgment collections in order to reduce performance disparity between different gendered queries without negatively compromising overall model performance.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
While neural rankers continue to show notable performance improvements over a wide variety of information retrieval tasks, there have been recent studies that show such rankers may intensify certain stereotypical biases. In this paper, we investigate whether neural rankers introduce retrieval effectiveness (performance) disparities over queries related to different genders. We specifically study whether there are significant performance differences between male and female queries when retrieved by neural rankers. Through our empirical study over the MS MARCO collection, we find that such performance disparities are notable and that the performance disparities may be due to the difference between how queries and their relevant judgements are collected and distributed for different gendered queries. More specifically, we observe that male queries are more closely associated with their relevant documents compared to female queries and hence neural rankers are able to more easily learn associations between male queries and their relevant documents. We show that it is possible to systematically balance relevance judgment collections in order to reduce performance disparity between different gendered queries without negatively compromising overall model performance.
Close
https://ls3.rnet.torontomu.ca/3477495-3531882-1/
doi:https://doi.org/10.1145/3477495.3531882
Close
Seyedsalehi, Shirin; Bigdeli, Amin; Arabzadeh, Negar; Mitra, Bhaskar; Zihayat, Morteza; Bagheri, Ebrahim
Bias-aware Fair Neural Ranking for Addressing Stereotypical Gender Biases Proceedings Article
In: EDBT/ICDT 2022, 2022.
Abstract | Links | BibTeX | Tags:
@inproceedings{edbt20221,
title = {Bias-aware Fair Neural Ranking for Addressing Stereotypical Gender Biases},
author = {Shirin Seyedsalehi and Amin Bigdeli and Negar Arabzadeh and Bhaskar Mitra and Morteza Zihayat and Ebrahim Bagheri},
url = {https://ls3.rnet.ryerson.ca/wp-content/uploads/2023/03/De-biasing-Relevance-Judgements-for-Fair.pdf},
doi = {https://doi.org/10.1007/978-3-031-28238-6_24},
year = {2022},
date = {2022-01-01},
urldate = {2022-01-01},
booktitle = {EDBT/ICDT 2022},
abstract = {Research has shown that neural rankers can pick up and intensify gender biases. The expression of stereotypical gender biases in retrieval systems can lead to their reinforcement in users' beliefs. As such, the objective of this paper is to propose a bias-aware fair ranker that explicitly incorporates a notion of gender bias and hence controls how bias is expressed in documents that are retrieved. The proposed approach is designed such that it learns the notion of relevance between the document and the query from the relevant sampled documents while incorporating the notion of gender bias by penalizing irrelevant biased sampled documents. We show that unlike the state of the art, our approach reduces bias while maintaining retrieval effectiveness over different query sets.
},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
Research has shown that neural rankers can pick up and intensify gender biases. The expression of stereotypical gender biases in retrieval systems can lead to their reinforcement in users' beliefs. As such, the objective of this paper is to propose a bias-aware fair ranker that explicitly incorporates a notion of gender bias and hence controls how bias is expressed in documents that are retrieved. The proposed approach is designed such that it learns the notion of relevance between the document and the query from the relevant sampled documents while incorporating the notion of gender bias by penalizing irrelevant biased sampled documents. We show that unlike the state of the art, our approach reduces bias while maintaining retrieval effectiveness over different query sets.

Close
https://ls3.rnet.ryerson.ca/wp-content/uploads/2023/03/De-biasing-Relevance-Judg[...]
doi:https://doi.org/10.1007/978-3-031-28238-6_24
Close
Bigdeli, Amin; Arabzadeh, Negar; Seyedsalehi, Shirin; Zihayat, Morteza; Bagheri, Ebrahim
Gender Fairness in Information Retrieval Systems Proceedings Article
In: 45th International ACM SIGIR Conference on Research and Development in Information Retrieval (SIGIR 2022) (tutorial), 2022.
Abstract | Links | BibTeX | Tags:
@inproceedings{sigir2022b,
title = {Gender Fairness in Information Retrieval Systems},
author = {Amin Bigdeli and Negar Arabzadeh and Shirin Seyedsalehi and Morteza Zihayat and Ebrahim Bagheri},
url = {https://ls3.rnet.torontomu.ca/3477495-3532680-1/},
doi = {https://doi.org/10.1145/3477495.3532680},
year = {2022},
date = {2022-04-16},
urldate = {2022-04-16},
booktitle = {45th International ACM SIGIR Conference on Research and Development in Information Retrieval (SIGIR 2022) (tutorial)},
abstract = {Recent studies have shown that it is possible for stereotypical gender biases to find their way into representational and algorithmic aspects of retrieval methods; hence, exhibit themselves in retrieval outcomes. In this tutorial, we inform the audience of various studies that have systematically reported the presence of stereotypical gender biases in Information Retrieval (IR) systems. We further classify existing work on gender biases in IR systems as being related to (1) relevance judgement datasets, (2) structure of retrieval methods, and (3) representations learnt for queries and documents. We present how each of these components can be impacted by or cause intensified biases during retrieval. Based on these identified issues, we then present a collection of approaches from the literature that have discussed how such biases can be measured, controlled, or mitigated. Additionally, we introduce publicly available datasets that are often used for investigating gender biases in IR systems as well as evaluation methodology adopted for determining the utility of gender bias mitigation strategies.
},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
Recent studies have shown that it is possible for stereotypical gender biases to find their way into representational and algorithmic aspects of retrieval methods; hence, exhibit themselves in retrieval outcomes. In this tutorial, we inform the audience of various studies that have systematically reported the presence of stereotypical gender biases in Information Retrieval (IR) systems. We further classify existing work on gender biases in IR systems as being related to (1) relevance judgement datasets, (2) structure of retrieval methods, and (3) representations learnt for queries and documents. We present how each of these components can be impacted by or cause intensified biases during retrieval. Based on these identified issues, we then present a collection of approaches from the literature that have discussed how such biases can be measured, controlled, or mitigated. Additionally, we introduce publicly available datasets that are often used for investigating gender biases in IR systems as well as evaluation methodology adopted for determining the utility of gender bias mitigation strategies.

Close
https://ls3.rnet.torontomu.ca/3477495-3532680-1/
doi:https://doi.org/10.1145/3477495.3532680
Close
Rad, Radin Hamidi; SeyedSalehi, Shirin; Kargar, Mehdi; Zihayat, Morteza; Ebrahim Bagheri,
Neural Approach to Forming Coherent Teams in Collaboration Networks Proceedings Article
In: EDBT/ICDT 2022, 2022.
Abstract | Links | BibTeX | Tags:
@inproceedings{edbt-icdt20222,
title = {Neural Approach to Forming Coherent Teams in Collaboration Networks},
author = {Radin Hamidi Rad and Shirin SeyedSalehi and Mehdi Kargar and Morteza Zihayat and Ebrahim Bagheri,},
url = {https://openproceedings.org/2022/conf/edbt/paper-135.pdf},
doi = {https://doi.org/10.48786/edbt.2022.37},
year = {2022},
date = {2022-01-01},
urldate = {2022-01-01},
booktitle = {EDBT/ICDT 2022},
abstract = {We study team formation whose goal is to form a team of experts who collectively cover a set of desirable skills. This problem has mainly been addressed either through graph search techniques, which look for subgraphs that satisfy a set of skill requirements, or through neural architectures that learn a mapping from the skill space to the expert space. An exact graph-based solution to this problem is intractable and its heuristic variants are only able to identify sub-optimal solutions. On the other hand, neural architecture-based solutions treat experts individually without concern for team dynamics. In this paper, we address the task of forming coherent teams and propose a neural approach that maximizes the likelihood of successful collaboration among team members while maximizing the coverage of the required skills by the team. Our extensive experiments show that the proposed approach outperforms the state-of-the-art methods in terms of both ranking and quality metrics.
},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
We study team formation whose goal is to form a team of experts who collectively cover a set of desirable skills. This problem has mainly been addressed either through graph search techniques, which look for subgraphs that satisfy a set of skill requirements, or through neural architectures that learn a mapping from the skill space to the expert space. An exact graph-based solution to this problem is intractable and its heuristic variants are only able to identify sub-optimal solutions. On the other hand, neural architecture-based solutions treat experts individually without concern for team dynamics. In this paper, we address the task of forming coherent teams and propose a neural approach that maximizes the likelihood of successful collaboration among team members while maximizing the coverage of the required skills by the team. Our extensive experiments show that the proposed approach outperforms the state-of-the-art methods in terms of both ranking and quality metrics.

Close
https://openproceedings.org/2022/conf/edbt/paper-135.pdf
doi:https://doi.org/10.48786/edbt.2022.37
Close
Klasnja, Anja; Arabzadeh, Negar; Mehrvarz, Mahbod; Bagheri, Ebrahim
On the Characteristics of Ranking-based Gender Bias Measures Proceedings Article
In: WebSci'22, 2022.
Abstract | Links | BibTeX | Tags:
@inproceedings{websci22a,
title = {On the Characteristics of Ranking-based Gender Bias Measures},
author = {Anja Klasnja and Negar Arabzadeh and Mahbod Mehrvarz and Ebrahim Bagheri},
url = {https://ls3.rnet.torontomu.ca/3501247-3531540-1/},
doi = {https://doi.org/10.1145/3501247.3531540},
year = {2022},
date = {2022-03-30},
urldate = {2022-03-30},
booktitle = {WebSci'22},
series = {The 14th International ACM Conference on Web Science in 2022 (WebSci’22), 26 – 29, June, 2022, Universitat Pompeu Fabra, Barcelona, Spain},
abstract = {With increased recent awareness on the possible impact of retrieval techniques on intensifying gender biases, researchers have embarked on defining quantifiable gender bias metrics that can provide the means to concretely measure such biases in practice. While successful in allowing for identifying possible sources of gender bias, there has been little work that systematically explores the characteristics of these metrics. This paper argues that effective future works on gender biases in information retrieval require a careful understanding of the bias metrics in terms of their consistency, robustness, sensitivity and also their relation with psychological characteristics and what they actually measure. Through our experiments, we show that more rigorous work on gender bias metrics need to be pursued as existing metrics may not necessarily be consistent and robust and often capture differing psychological characteristics.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
With increased recent awareness on the possible impact of retrieval techniques on intensifying gender biases, researchers have embarked on defining quantifiable gender bias metrics that can provide the means to concretely measure such biases in practice. While successful in allowing for identifying possible sources of gender bias, there has been little work that systematically explores the characteristics of these metrics. This paper argues that effective future works on gender biases in information retrieval require a careful understanding of the bias metrics in terms of their consistency, robustness, sensitivity and also their relation with psychological characteristics and what they actually measure. Through our experiments, we show that more rigorous work on gender bias metrics need to be pursued as existing metrics may not necessarily be consistent and robust and often capture differing psychological characteristics.
Close
https://ls3.rnet.torontomu.ca/3501247-3531540-1/
doi:https://doi.org/10.1145/3501247.3531540
Close
Rad, Radin Hamidi; Bagheri, Ebrahim; Kargar, Mehdi; Srivastava, Divesh; Szlichta, Jaroslaw
Subgraph Representation Learning for Team Mining Proceedings Article
In: WebSci'22, 2022.
Abstract | Links | BibTeX | Tags:
@inproceedings{websci22b,
title = {Subgraph Representation Learning for Team Mining},
author = {Radin Hamidi Rad and Ebrahim Bagheri and Mehdi Kargar and Divesh Srivastava and Jaroslaw Szlichta},
url = {https://ls3.rnet.torontomu.ca/3501247-3531578/},
doi = {https://doi.org/10.1145/3501247.3531578},
year = {2022},
date = {2022-03-30},
urldate = {2022-03-30},
booktitle = {WebSci'22},
series = {The 14th International ACM Conference on Web Science in 2022 (WebSci’22), 26 – 29, June, 2022, Universitat Pompeu Fabra, Barcelona, Spain},
abstract = {Team mining is concerned with the identification of a group of experts that are able to collaborate with each other in order to collectively cover a set of required skills. This problem has mainly been addressed either through graph search, which looks for subgraphs that satisfy the skill requirements or through neural architectures that learn a mapping from the skill space to the expert space. An exact graph-based solution to this problem is intractable and its heuristic variants are only able to identify sub-optimal solutions. On the other hand, neural architecture-based solutions are prone to overfitting and simplistically reduce the problem of team formation to one of expert ranking. Our work in this paper proposes an unsupervised heterogeneous skip-gram-based subgraph mining approach that can learn representations for subgraphs in a collaboration network. Unlike previous work, the subgraph representations allow our method to mine teams that have past collaborative history and collectively cover the requested desirable skills. Through our experiments, we demonstrate that our proposed approach is able to outperform a host of state-of-the-art team mining techniques from both quantitative and qualitative perspectives.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
Team mining is concerned with the identification of a group of experts that are able to collaborate with each other in order to collectively cover a set of required skills. This problem has mainly been addressed either through graph search, which looks for subgraphs that satisfy the skill requirements or through neural architectures that learn a mapping from the skill space to the expert space. An exact graph-based solution to this problem is intractable and its heuristic variants are only able to identify sub-optimal solutions. On the other hand, neural architecture-based solutions are prone to overfitting and simplistically reduce the problem of team formation to one of expert ranking. Our work in this paper proposes an unsupervised heterogeneous skip-gram-based subgraph mining approach that can learn representations for subgraphs in a collaboration network. Unlike previous work, the subgraph representations allow our method to mine teams that have past collaborative history and collectively cover the requested desirable skills. Through our experiments, we demonstrate that our proposed approach is able to outperform a host of state-of-the-art team mining techniques from both quantitative and qualitative perspectives.
Close
https://ls3.rnet.torontomu.ca/3501247-3531578/
doi:https://doi.org/10.1145/3501247.3531578
Close
2021
Pourgholamali, Fatemeh; Kahani, Mohsen; Noorian, Zeinab; Bagheri, Ebrahim
Learning Product Representations for Generating Reviews for Cold Products Journal Article
In: Knowledge-based Systems, 2021.
Abstract | Links | BibTeX | Tags:
@article{kbs21,
title = {Learning Product Representations for Generating Reviews for Cold Products},
author = {Fatemeh Pourgholamali and Mohsen Kahani and Zeinab Noorian and Ebrahim Bagheri},
url = {https://ls3.rnet.torontomu.ca/1-s2-0-s095070512100544x-main/},
doi = {https://doi.org/10.1016/j.knosys.2021.107282},
year = {2021},
date = {2021-01-01},
urldate = {2021-01-01},
journal = {Knowledge-based Systems},
abstract = {Existing work in the literature have shown that the number and quality of product ratings andreviews have a direct correlation with the product purchase rates in online e-commerce portals.However, the majority of the products on e-commerce portals do not have any ratings or reviewsand are known as cold products (∼90% of products on Amazon are cold). As such, there has beengrowing interest in generating reviews for cold products by selectively transferring reviews fromother similar yet warm products. Our work in this paper focuses on this specific problem and gen-erates reviews for cold products through review selection. Similar to existing work in the literature,our work assumes a relationship between product attribute-values and the reviews that productsreceive. However, unlike the literature, our method (1) is not restricted to the exact surface formof a product attribute name; and, (2) can distinguish between the same attribute expressed in dif-ferent forms. We achieve these two important characteristics by proposing methods to learn neuralproduct representations that capture the semantics of product attribute-values as they relate touser reviews. More specifically, our work offers (i) an approach to learn neural representationsof product attribute-values within a shared embedding space as product reviews; (ii) a weightedcomposition strategy to develop product representations from the representation of its attributes;and, (iii) a review selection method that selects relevant reviews for the composed product repre-sentation within the neural embedding space. We show through our extensive experiments on fivedatasets consisting of products fromCNET.comand movies fromrottentomatoes.comthat ourmethod is able to show stronger performance compared to several baselines on ROUGE-2 metrics.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}

Close
Existing work in the literature have shown that the number and quality of product ratings andreviews have a direct correlation with the product purchase rates in online e-commerce portals.However, the majority of the products on e-commerce portals do not have any ratings or reviewsand are known as cold products (∼90% of products on Amazon are cold). As such, there has beengrowing interest in generating reviews for cold products by selectively transferring reviews fromother similar yet warm products. Our work in this paper focuses on this specific problem and gen-erates reviews for cold products through review selection. Similar to existing work in the literature,our work assumes a relationship between product attribute-values and the reviews that productsreceive. However, unlike the literature, our method (1) is not restricted to the exact surface formof a product attribute name; and, (2) can distinguish between the same attribute expressed in dif-ferent forms. We achieve these two important characteristics by proposing methods to learn neuralproduct representations that capture the semantics of product attribute-values as they relate touser reviews. More specifically, our work offers (i) an approach to learn neural representationsof product attribute-values within a shared embedding space as product reviews; (ii) a weightedcomposition strategy to develop product representations from the representation of its attributes;and, (iii) a review selection method that selects relevant reviews for the composed product repre-sentation within the neural embedding space. We show through our extensive experiments on fivedatasets consisting of products fromCNET.comand movies fromrottentomatoes.comthat ourmethod is able to show stronger performance compared to several baselines on ROUGE-2 metrics.
Close
https://ls3.rnet.torontomu.ca/1-s2-0-s095070512100544x-main/
doi:https://doi.org/10.1016/j.knosys.2021.107282
Close
Hosseini, Hawre; Bagheri, Ebrahim
Learning to Rank Implicit Entities on Twitter Journal Article
In: Information Processing and Management, 2021.
Abstract | Links | BibTeX | Tags:
@article{ipm2021a,
title = {Learning to Rank Implicit Entities on Twitter},
author = {Hawre Hosseini and Ebrahim Bagheri},
url = {https://ls3.rnet.torontomu.ca/1-s2-0-s0306457321000145-main/},
doi = {https://doi.org/10.1016/j.ipm.2021.102503},
year = {2021},
date = {2021-01-01},
urldate = {2021-01-01},
journal = {Information Processing and Management},
abstract = {Linking textual content to entities from the knowledge graph has received increasing attention in the context of which surface form representations of entities, e.g., terms or phrases, are disambiguated and linked to appropriate entities. This allows textual content, e.g., social user-generated content, to be interpreted and reasoned on at a higher semantic level. However, recent research has shown that at least 15% of social user-generated content do not have explicit surface form representation of entities that they discuss. In other words, the subject of the content is only implied. For such cases, existing entity linking methods, known as explicit entity linking, cannot perform linking because entity surface form is missing. In this paper, we investigate how implicit entities within social content can be identified and linked. The contributions of our work include (1) modeling the problem of implicit entity linking as a learn to rank problem where knowledge graph entities are ranked based on their relevance to the input tweet, (2) the introduction and systematic classification of appropriate features for identifying implicit entities, (3) extensive evaluation of the proposed approach in comparison with existing state of the art as well as performing feature analysis over proposed features, and (4) the qualitative assessment of the root causes for mislabeled instances in our experiments and careful discussion on how mislabeled entity links can be addressed as a part of future work. In our experiments, we show that our proposed features are able to improve the state of the art over the standard Precision at 1 (P@1) metric.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}

Close
Linking textual content to entities from the knowledge graph has received increasing attention in the context of which surface form representations of entities, e.g., terms or phrases, are disambiguated and linked to appropriate entities. This allows textual content, e.g., social user-generated content, to be interpreted and reasoned on at a higher semantic level. However, recent research has shown that at least 15% of social user-generated content do not have explicit surface form representation of entities that they discuss. In other words, the subject of the content is only implied. For such cases, existing entity linking methods, known as explicit entity linking, cannot perform linking because entity surface form is missing. In this paper, we investigate how implicit entities within social content can be identified and linked. The contributions of our work include (1) modeling the problem of implicit entity linking as a learn to rank problem where knowledge graph entities are ranked based on their relevance to the input tweet, (2) the introduction and systematic classification of appropriate features for identifying implicit entities, (3) extensive evaluation of the proposed approach in comparison with existing state of the art as well as performing feature analysis over proposed features, and (4) the qualitative assessment of the root causes for mislabeled instances in our experiments and careful discussion on how mislabeled entity links can be addressed as a part of future work. In our experiments, we show that our proposed features are able to improve the state of the art over the standard Precision at 1 (P@1) metric.
Close
https://ls3.rnet.torontomu.ca/1-s2-0-s0306457321000145-main/
doi:https://doi.org/10.1016/j.ipm.2021.102503
Close
Mahdavimoghaddam, Jalehsadat; Krishnaswamy, Niranjan; Bagheri, Ebrahim
On the Congruence Between Online Social Content and Future IT Skill Demand Journal Article
In: Proc. ACM Hum. Comput. Interact. , vol. 5(CSCW2), pp. 1-27, 2021.
Abstract | Links | BibTeX | Tags:
@article{cscw21,
title = {On the Congruence Between Online Social Content and Future IT Skill Demand},
author = {Jalehsadat Mahdavimoghaddam and Niranjan Krishnaswamy and Ebrahim Bagheri},
url = {https://ls3.rnet.torontomu.ca/3479511/},
doi = {https://doi.org/10.1145/3479511},
year = {2021},
date = {2021-01-01},
urldate = {2021-01-01},
journal = {Proc. ACM Hum. Comput. Interact. },
volume = {5(CSCW2)},
pages = {1-27},
abstract = {The speed of digital transformation has resulted in new challenges for job seekers to become lifelong learners and to develop new skills faster than before. In this paper, our main objective is to examine how online content can serve as indicators for changes to the Information Technology (IT) industry and its in-demand skills. To study this relationship, we collect Reddit posts to represent social media content and job postings to reflect the IT industry based on which we explore possible correlations between them. Further, we propose a methodology to quantitatively estimate the predictive power of social media content for future in-demand skills. Our results show that the frequency of skill-related conversations on Reddit correlates with the popularity of skills in job posting data. Additionally, our findings indicate that the number of social posts dedicated to a specific skill can be a strong indicator for future job requirements. This is an important finding because identifying what skills the labor force should acquire will assist job seekers to plan their lifelong learning objectives to (a) maximize their employability, (b) continuously update their skills to remain in demand, and (c) be informed and actively engaged in defining knowledge trends, rather than reactively becoming informed of the latest information.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}

Close
The speed of digital transformation has resulted in new challenges for job seekers to become lifelong learners and to develop new skills faster than before. In this paper, our main objective is to examine how online content can serve as indicators for changes to the Information Technology (IT) industry and its in-demand skills. To study this relationship, we collect Reddit posts to represent social media content and job postings to reflect the IT industry based on which we explore possible correlations between them. Further, we propose a methodology to quantitatively estimate the predictive power of social media content for future in-demand skills. Our results show that the frequency of skill-related conversations on Reddit correlates with the popularity of skills in job posting data. Additionally, our findings indicate that the number of social posts dedicated to a specific skill can be a strong indicator for future job requirements. This is an important finding because identifying what skills the labor force should acquire will assist job seekers to plan their lifelong learning objectives to (a) maximize their employability, (b) continuously update their skills to remain in demand, and (c) be informed and actively engaged in defining knowledge trends, rather than reactively becoming informed of the latest information.
Close
https://ls3.rnet.torontomu.ca/3479511/
doi:https://doi.org/10.1145/3479511
Close
Pham, Ba’; Jovanovic, Jelena; Bagheri, Ebrahim; Antony, Jesmin; Ashoor, Huda; Nguyen, Tam T; Rios, Patricia; Robson, Reid C; Thomas, Sonia M; Watt, Jennifer; Straus, Sharon E; Tricco, Andrea C
Text Mining to Support Abstract Screening for Knowledge Syntheses: A Semi-Automated Workflow Journal Article
In: Systematic Reviews, 2021.
Abstract | Links | BibTeX | Tags:
@article{slr2021,
title = {Text Mining to Support Abstract Screening for Knowledge Syntheses: A Semi-Automated Workflow},
author = {Ba’ Pham and Jelena Jovanovic and Ebrahim Bagheri and Jesmin Antony and Huda Ashoor and Tam T Nguyen and Patricia Rios and Reid C Robson and Sonia M Thomas and Jennifer Watt and Sharon E Straus and Andrea C Tricco},
url = {https://ls3.rnet.torontomu.ca/s13643-021-01700-x/},
doi = {https://doi.org/10.1186/s13643-021-01700-x},
year = {2021},
date = {2021-01-01},
urldate = {2021-01-01},
journal = {Systematic Reviews},
abstract = {Background: Current text mining tools supporting abstract screening in systematic reviews are not widely used, in part because they lack sensitivity and precision. We set out to develop an accessible, semi-automated “workflow” to conduct abstract screening for systematic reviews and other knowledge synthesis methods. Methods: We adopt widely recommended text-mining and machine-learning methods to 1) process title-abstracts into numerical training data; and 2) train a classification model to predict eligible abstracts. The predicted abstracts are screened by human reviewers for (“true”) eligibility, and the newly eligible abstracts used to identify (using near-neighbor methods) similar abstracts, which are also screened. These abstracts, as well as their eligibility results, are used to update the classification model, and the above steps are iterated until no new eligible abstracts are identified. The workflow was implemented in R and evaluated using a systematic review of insulin-formulations for type-1 diabetes (14,314 abstracts) and a scoping review of knowledge-synthesis methods (17,200 abstracts). Workflow performance was evaluated against the recommended practice of screening abstracts by 2 reviewers, independently. Standard measures were examined: sensitivity (inclusion of all truly eligible abstracts), specificity (exclusion of all truly ineligible abstracts), precision (inclusion of all truly eligible abstracts among all abstracts screened as eligible), F1-score (harmonic average of sensitivity and precision), and accuracy (correctly predicted eligible or ineligible abstracts). Workload reduction was measured as the hours the workflow saved, given only a subset of abstracts needed human screening. Results: With respect to the systematic and scoping reviews, the workflow attained 88%/89% sensitivity, 99%/99% specificity, 71%/72% precision, an F1-score of 79%/79%, 98%/97% accuracy, 63%/55% workload reduction, with 12%/11% fewer abstracts for full-text retrieval and screening, and 0/6 1.5% missed studies in the completed reviews. Conclusion: The workflow was a sensitive, precise, and efficient alternative to the recommended practice of screening abstracts with 2 reviewers. All eligible studies were identified in the first case, while 6 studies (1.5%) were missed in the second that would likely not impact the review’s conclusions. We have described the workflow in language accessible to reviewers with limited exposure to natural language processing and machine learning, and have made the code accessible to reviewers.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}

Close
Background: Current text mining tools supporting abstract screening in systematic reviews are not widely used, in part because they lack sensitivity and precision. We set out to develop an accessible, semi-automated “workflow” to conduct abstract screening for systematic reviews and other knowledge synthesis methods. Methods: We adopt widely recommended text-mining and machine-learning methods to 1) process title-abstracts into numerical training data; and 2) train a classification model to predict eligible abstracts. The predicted abstracts are screened by human reviewers for (“true”) eligibility, and the newly eligible abstracts used to identify (using near-neighbor methods) similar abstracts, which are also screened. These abstracts, as well as their eligibility results, are used to update the classification model, and the above steps are iterated until no new eligible abstracts are identified. The workflow was implemented in R and evaluated using a systematic review of insulin-formulations for type-1 diabetes (14,314 abstracts) and a scoping review of knowledge-synthesis methods (17,200 abstracts). Workflow performance was evaluated against the recommended practice of screening abstracts by 2 reviewers, independently. Standard measures were examined: sensitivity (inclusion of all truly eligible abstracts), specificity (exclusion of all truly ineligible abstracts), precision (inclusion of all truly eligible abstracts among all abstracts screened as eligible), F1-score (harmonic average of sensitivity and precision), and accuracy (correctly predicted eligible or ineligible abstracts). Workload reduction was measured as the hours the workflow saved, given only a subset of abstracts needed human screening. Results: With respect to the systematic and scoping reviews, the workflow attained 88%/89% sensitivity, 99%/99% specificity, 71%/72% precision, an F1-score of 79%/79%, 98%/97% accuracy, 63%/55% workload reduction, with 12%/11% fewer abstracts for full-text retrieval and screening, and 0/6 1.5% missed studies in the completed reviews. Conclusion: The workflow was a sensitive, precise, and efficient alternative to the recommended practice of screening abstracts with 2 reviewers. All eligible studies were identified in the first case, while 6 studies (1.5%) were missed in the second that would likely not impact the review’s conclusions. We have described the workflow in language accessible to reviewers with limited exposure to natural language processing and machine learning, and have made the code accessible to reviewers.
Close
https://ls3.rnet.torontomu.ca/s13643-021-01700-x/
doi:https://doi.org/10.1186/s13643-021-01700-x
Close
Fani, Hossein; Tamannaee, Mahtab; Zarrinkalam, Fattane; Samouh, Jamil; Paydar, Samad; Bagheri, Ebrahim
An Extensible Toolkit of Query Refinement Methods and Gold Standard Dataset Generation Proceedings Article
In: 43rd European Conference on IR Research (ECIR 2021), 2021.
Abstract | Links | BibTeX | Tags:
@inproceedings{ecir2021a,
title = {An Extensible Toolkit of Query Refinement Methods and Gold Standard Dataset Generation},
author = {Hossein Fani and Mahtab Tamannaee and Fattane Zarrinkalam and Jamil Samouh and Samad Paydar and Ebrahim Bagheri},
url = {https://ls3.rnet.torontomu.ca/an-extensible-toolkit-of-query-refinement-methods-and-gold-standard-dataset-generation/},
doi = {https://doi.org/10.1007/978-3-030-72240-1_54},
year = {2021},
date = {2021-01-01},
urldate = {2021-01-01},
booktitle = {43rd European Conference on IR Research (ECIR 2021)},
abstract = {We present an open-source extensible python-based toolkit that provides access to a (1) range of built-in unsupervised query expansion methods, and (2) pipeline for generating gold standard datasets for building and evaluating supervised query refinement methods. While the information literature offers abundant work on query expansion techniques, there is yet to be a tool that provides unified access to a comprehensive set of query expansion techniques. The advantage of our proposed toolkit, known as ReQue (refining queries), is that it offers one-stop shop access to query expansion techniques to be used in external information retrieval applications. More importantly, we show how ReQue can be used for building gold standards datasets that can be used for training supervised deep learning-based query refinement techniques. These techniques require sizeable gold query refinement datasets, which are not available in the literature. Reque provides the means to systematically build such datasets.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
We present an open-source extensible python-based toolkit that provides access to a (1) range of built-in unsupervised query expansion methods, and (2) pipeline for generating gold standard datasets for building and evaluating supervised query refinement methods. While the information literature offers abundant work on query expansion techniques, there is yet to be a tool that provides unified access to a comprehensive set of query expansion techniques. The advantage of our proposed toolkit, known as ReQue (refining queries), is that it offers one-stop shop access to query expansion techniques to be used in external information retrieval applications. More importantly, we show how ReQue can be used for building gold standards datasets that can be used for training supervised deep learning-based query refinement techniques. These techniques require sizeable gold query refinement datasets, which are not available in the literature. Reque provides the means to systematically build such datasets.
Close
https://ls3.rnet.torontomu.ca/an-extensible-toolkit-of-query-refinement-methods-[...]
doi:https://doi.org/10.1007/978-3-030-72240-1_54
Close
Arabzadeh, Negar; Mahdavi, Jaleh; Bagheri, Ebrahim
Document Specificity Measures for Ad-hoc Retrieval Proceedings Article
In: The 34th Canadian Conference on Artificial Intelligence, 2021.
Abstract | Links | BibTeX | Tags:
@inproceedings{cai2021,
title = {Document Specificity Measures for Ad-hoc Retrieval},
author = {Negar Arabzadeh and Jaleh Mahdavi and Ebrahim Bagheri},
url = {https://ls3.rnet.torontomu.ca/canadian_ai_2021___document_specificity/},
year = {2021},
date = {2021-01-01},
urldate = {2021-01-01},
booktitle = {The 34th Canadian Conference on Artificial Intelligence},
abstract = {When searching, users are interested in accessing the most relevant and specific content related to their information need. Earlier research has shown that it is much easier to retrieve appropriate content for specific queries compared to generic ones as it is possible to discriminatively distinguish the content related to specific queries. The work in this paper builds on earlier findings on query and document specificity and provides a systematic account of ways through which document specificity can be measured. We present a comprehensive view of how various measures of document specificity can be defined and comparatively analyze the utility of various document specificity measures within the context of ad hoc retrieval based on three well-known TREC corpora, namely Robust04, ClueWeb09B, ClueWeb12B and their associated TREC topics. We report on our findings on the effectiveness of each type of document specificity measure.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
When searching, users are interested in accessing the most relevant and specific content related to their information need. Earlier research has shown that it is much easier to retrieve appropriate content for specific queries compared to generic ones as it is possible to discriminatively distinguish the content related to specific queries. The work in this paper builds on earlier findings on query and document specificity and provides a systematic account of ways through which document specificity can be measured. We present a comprehensive view of how various measures of document specificity can be defined and comparatively analyze the utility of various document specificity measures within the context of ad hoc retrieval based on three well-known TREC corpora, namely Robust04, ClueWeb09B, ClueWeb12B and their associated TREC topics. We report on our findings on the effectiveness of each type of document specificity measure.
Close
https://ls3.rnet.torontomu.ca/canadian_ai_2021___document_specificity/
Close
Bigdeli, Amin; Arabzadeh, Negar; Zihayat, Morteza; Bagheri, Ebrahim
Exploring Gender Biases in Information Retrieval Relevance Judgement Datasets Proceedings Article
In: 43rd European Conference on IR Research (ECIR 2021), 2021.
Abstract | Links | BibTeX | Tags:
@inproceedings{ecir2021b,
title = {Exploring Gender Biases in Information Retrieval Relevance Judgement Datasets},
author = {Amin Bigdeli and Negar Arabzadeh and Morteza Zihayat and Ebrahim Bagheri},
url = {https://ls3.rnet.torontomu.ca/ecir2021_exploring_gender_biases_compressed/},
doi = {https://doi.org/10.1007/978-3-030-72240-1_18},
year = {2021},
date = {2021-01-01},
urldate = {2021-01-01},
booktitle = {43rd European Conference on IR Research (ECIR 2021)},
abstract = {Recent studies in information retrieval have shown that gender biases have found their way into representational and algorithmic aspects of computational models. In this paper, we focus specifically on gender biases in information retrieval gold standard datasets, often referred to as relevance judgements. While not explored in the past, we submit that it is important to understand and measure the extent to which gender biases may be present in information retrieval relevance judgements primarily because relevance judgements are not only the primary source for evaluating IR techniques but are also widely used for training end-to-end neural ranking methods. As such, the presence of bias in relevance judgements would immediately find its way into how retrieval methods operate in practice. Based on a fine-tuned BERT model, we show how queries can be labeled for gender at scale based on which we label MS MARCO queries. We then show how different psychological characteristics are exhibited within documents associated with gendered queries within the relevance judgement datasets. Our observations show that stereotypical biases are prevalent in relevance judgement documents.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
Recent studies in information retrieval have shown that gender biases have found their way into representational and algorithmic aspects of computational models. In this paper, we focus specifically on gender biases in information retrieval gold standard datasets, often referred to as relevance judgements. While not explored in the past, we submit that it is important to understand and measure the extent to which gender biases may be present in information retrieval relevance judgements primarily because relevance judgements are not only the primary source for evaluating IR techniques but are also widely used for training end-to-end neural ranking methods. As such, the presence of bias in relevance judgements would immediately find its way into how retrieval methods operate in practice. Based on a fine-tuned BERT model, we show how queries can be labeled for gender at scale based on which we label MS MARCO queries. We then show how different psychological characteristics are exhibited within documents associated with gendered queries within the relevance judgement datasets. Our observations show that stereotypical biases are prevalent in relevance judgement documents.
Close
https://ls3.rnet.torontomu.ca/ecir2021_exploring_gender_biases_compressed/
doi:https://doi.org/10.1007/978-3-030-72240-1_18
Close
Bigdeli, Amin; Arabzadeh, Negar; Seyedsalehi, Shirin; Zihayat, Morteza; Bagheri, Ebrahim
On the Orthogonality of Bias and Effectiveness in Ad hoc Retrieval Proceedings Article
In: The 44th International ACM SIGIR Conference. on Research and Development in Information Retrieval (SIGIR 2021), 2021.
Abstract | Links | BibTeX | Tags:
@inproceedings{sigir2021a,
title = {On the Orthogonality of Bias and Effectiveness in Ad hoc Retrieval},
author = {Amin Bigdeli and Negar Arabzadeh and Shirin Seyedsalehi and Morteza Zihayat and Ebrahim Bagheri},
url = {https://ls3.rnet.torontomu.ca/3404835-3463110/},
doi = {https://doi.org/10.1145/3404835.3463110},
year = {2021},
date = {2021-01-01},
urldate = {2021-01-01},
booktitle = {The 44th International ACM SIGIR Conference. on Research and Development in Information Retrieval (SIGIR 2021)},
abstract = {Various researchers have recently explored the impact of different types of biases on information retrieval tasks such as ad hoc retrieval and question answering. While the impact of bias needs to be controlled in order to avoid increased prejudices, the literature has often viewed the relationship between increased retrieval utility (effectiveness) and reduced bias as a tradeoff where one can suffer from the other. In this paper, we empirically study this tradeoff and explore whether it would be possible to reduce bias while maintaining similar retrieval utility. We show this would be possible by revising the input query through a bias-aware pseudo-relevance feedback framework. We report our findings based on four widely used TREC corpora namely Robust04, Gov2, ClueWeb09 and ClueWeb12 and using two classes of bias metrics. The findings of this paper are significant as they are among the first to show that decrease in bias does not necessarily need to come at the cost of reduced utility.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
Various researchers have recently explored the impact of different types of biases on information retrieval tasks such as ad hoc retrieval and question answering. While the impact of bias needs to be controlled in order to avoid increased prejudices, the literature has often viewed the relationship between increased retrieval utility (effectiveness) and reduced bias as a tradeoff where one can suffer from the other. In this paper, we empirically study this tradeoff and explore whether it would be possible to reduce bias while maintaining similar retrieval utility. We show this would be possible by revising the input query through a bias-aware pseudo-relevance feedback framework. We report our findings based on four widely used TREC corpora namely Robust04, Gov2, ClueWeb09 and ClueWeb12 and using two classes of bias metrics. The findings of this paper are significant as they are among the first to show that decrease in bias does not necessarily need to come at the cost of reduced utility.
Close
https://ls3.rnet.torontomu.ca/3404835-3463110/
doi:https://doi.org/10.1145/3404835.3463110
Close
Rad, Radin Hamidi; Bagheri, Ebrahim; Kargar, Mehdi; Srivastava, Divesh; Szlichta, Jaroslaw
Retrieving Skill-Based Teams from Collaboration Networks Proceedings Article
In: The 44th International ACM SIGIR Conference. on Research and Development in Information Retrieval (SIGIR 2021), 2021.
Abstract | Links | BibTeX | Tags:
@inproceedings{sigir2021b,
title = {Retrieving Skill-Based Teams from Collaboration Networks},
author = {Radin Hamidi Rad and Ebrahim Bagheri and Mehdi Kargar and Divesh Srivastava and Jaroslaw Szlichta},
url = {https://ls3.rnet.torontomu.ca/3404835-3463105/},
doi = {https://doi.org/10.1145/3404835.3463105},
year = {2021},
date = {2021-01-01},
urldate = {2021-01-01},
booktitle = {The 44th International ACM SIGIR Conference. on Research and Development in Information Retrieval (SIGIR 2021)},
abstract = {Given a set of required skills, the objective of the team formation problem is to form a team of experts that cover the required skills. Most existing approaches are based on graph methods, such as minimum-cost spanning trees. These approaches, due to their limited view of the network, fail to capture complex interactions among experts and are computationally intractable. More recent approaches adopt neural architectures to learn a mapping between the skills and experts space. While they are more effective, these techniques face two main limitations: (1) they consider a fixed representation for both skills and experts, and (2) they overlook the significant amount of past collaboration network information. We learn dense representations for skills and experts based on previous collaborations and bootstrap the training process through transfer learning. We also propose to fine-tune the representation of skills and experts while learning the mapping function. Our experiments over the DBLP dataset verify that our proposed architecture is able to outperform the state-of-the-art graph and neural methods over both ranking and quality metrics.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
Given a set of required skills, the objective of the team formation problem is to form a team of experts that cover the required skills. Most existing approaches are based on graph methods, such as minimum-cost spanning trees. These approaches, due to their limited view of the network, fail to capture complex interactions among experts and are computationally intractable. More recent approaches adopt neural architectures to learn a mapping between the skills and experts space. While they are more effective, these techniques face two main limitations: (1) they consider a fixed representation for both skills and experts, and (2) they overlook the significant amount of past collaboration network information. We learn dense representations for skills and experts based on previous collaborations and bootstrap the training process through transfer learning. We also propose to fine-tune the representation of skills and experts while learning the mapping function. Our experiments over the DBLP dataset verify that our proposed architecture is able to outperform the state-of-the-art graph and neural methods over both ranking and quality metrics.
Close
https://ls3.rnet.torontomu.ca/3404835-3463105/
doi:https://doi.org/10.1145/3404835.3463105
Close
2020
Vo, Duc-Thuan; Al-Obeidat, Feras; Bagheri, Ebrahim
Extracting Temporal and Causal Relations based on Event Networks Journal Article
In: Information Processing and Management, 2020.
Abstract | Links | BibTeX | Tags:
@article{ipm2020b,
title = {Extracting Temporal and Causal Relations based on Event Networks},
author = {Duc-Thuan Vo and Feras Al-Obeidat and Ebrahim Bagheri},
url = {https://ls3.rnet.torontomu.ca/1-s2-0-s0306457320308141-main_compressed/},
doi = {https://doi.org/10.1016/j.ipm.2020.102319},
year = {2020},
date = {2020-01-01},
urldate = {2020-01-01},
journal = {Information Processing and Management},
abstract = {Event relations specify how different event flows expressed within the context of a textual passage relate to each other in terms of temporal and causal sequences. There have already been impactful work in the area of temporal and causal event relation extraction; however, the challenge with these approaches is that (1) they are mostly supervised methods and (2) they rely on syntactic and grammatical structure patterns at the sentence-level. In this paper, we address these challenges by proposing an unsupervised event network representation for temporal and causal relation extraction that operates at the document level. More specifically, we benefit from existing Open IE systems to generate a set of triple relations that are then used to build an event network. The event network is bootstrapped by labeling the temporal disposition of events that are directly linked to each other. We then systematically traverse the event network to identify the temporal and causal relations between indirectly connected events. We perform experiments based on the widely adopted TempEval-3 and Causal-TimeBank corpora and compare our work with several strong baselines. We show that our method improves performance compared to several strong methods.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}

Close
Event relations specify how different event flows expressed within the context of a textual passage relate to each other in terms of temporal and causal sequences. There have already been impactful work in the area of temporal and causal event relation extraction; however, the challenge with these approaches is that (1) they are mostly supervised methods and (2) they rely on syntactic and grammatical structure patterns at the sentence-level. In this paper, we address these challenges by proposing an unsupervised event network representation for temporal and causal relation extraction that operates at the document level. More specifically, we benefit from existing Open IE systems to generate a set of triple relations that are then used to build an event network. The event network is bootstrapped by labeling the temporal disposition of events that are directly linked to each other. We then systematically traverse the event network to identify the temporal and causal relations between indirectly connected events. We perform experiments based on the widely adopted TempEval-3 and Causal-TimeBank corpora and compare our work with several strong baselines. We show that our method improves performance compared to several strong methods.
Close
https://ls3.rnet.torontomu.ca/1-s2-0-s0306457320308141-main_compressed/
doi:https://doi.org/10.1016/j.ipm.2020.102319
Close
Zarrinkalam, Fattane; Faralli, Stefano; Piao, Guangyuan; Bagheri, Ebrahim
Extracting, Mining and Predicting Users' Interests from Social Media Journal Article
In: Foundations and Trends in Information Retrieval (FnTIR), 2020.
Abstract | Links | BibTeX | Tags:
@article{fntir20,
title = {Extracting, Mining and Predicting Users' Interests from Social Media},
author = {Fattane Zarrinkalam and Stefano Faralli and Guangyuan Piao and Ebrahim Bagheri},
url = {https://ls3.rnet.torontomu.ca/9781680837391-summary/},
doi = {http://dx.doi.org/10.1561/1500000078},
year = {2020},
date = {2020-01-01},
urldate = {2020-01-01},
journal = {Foundations and Trends in Information Retrieval (FnTIR)},
abstract = {The abundance of user generated content on social media provides the opportunity to build models that are able to accurately and effectively extract, mine and predict users' interests with the hopes of enabling more effective user engagement, better quality delivery of appropriate services and higher user satisfaction. While traditional methods for building user profiles relied on AI-based preference elicitation techniques that could have been considered to be intrusive and undesirable by the users, more recent advances are focused on a non-intrusive yet accurate way of determining users' interests and preferences. In this paper, we will cover five important subjects related to the mining of user interests from social media: (1) the foundations of social user interest modeling, such as information sources, various types of representation models and temporal features, (2) techniques that have been adopted or proposed for mining user interests, (3) different evaluation methodologies and benchmark datasets, (4) different applications that have been taking advantage of user interest mining from social media platforms, and (5) existing challenges, open research questions and exciting opportunities for further work.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}

Close
The abundance of user generated content on social media provides the opportunity to build models that are able to accurately and effectively extract, mine and predict users' interests with the hopes of enabling more effective user engagement, better quality delivery of appropriate services and higher user satisfaction. While traditional methods for building user profiles relied on AI-based preference elicitation techniques that could have been considered to be intrusive and undesirable by the users, more recent advances are focused on a non-intrusive yet accurate way of determining users' interests and preferences. In this paper, we will cover five important subjects related to the mining of user interests from social media: (1) the foundations of social user interest modeling, such as information sources, various types of representation models and temporal features, (2) techniques that have been adopted or proposed for mining user interests, (3) different evaluation methodologies and benchmark datasets, (4) different applications that have been taking advantage of user interest mining from social media platforms, and (5) existing challenges, open research questions and exciting opportunities for further work.
Close
https://ls3.rnet.torontomu.ca/9781680837391-summary/
doi:http://dx.doi.org/10.1561/1500000078
Close
Arabzadeh, Negar; Zarrinkalam, Fattane; Jovanovic, Jelena; Al-Obeidat, Feras; Bagheri, Ebrahim
Neural Embedding-based Specificity Metrics for Pre-Retrieval Query Performance Prediction Journal Article
In: Information Processing and Management, 2020.
Abstract | Links | BibTeX | Tags:
@article{ipm2020a,
title = {Neural Embedding-based Specificity Metrics for Pre-Retrieval Query Performance Prediction},
author = {Negar Arabzadeh and Fattane Zarrinkalam and Jelena Jovanovic and Feras Al-Obeidat and Ebrahim Bagheri},
url = {https://ls3.rnet.torontomu.ca/1-s2-0-s030645731931266x-main-3_compressed/},
doi = {https://doi.org/10.1016/j.ipm.2020.102248},
year = {2020},
date = {2020-01-01},
urldate = {2020-01-01},
journal = {Information Processing and Management},
abstract = {In information retrieval, the task of query performance prediction (QPP) is concerned with determining in advance the performance of a given query within the context of a retrieval model. QPP has an important role in ensuring proper handling of queries with varying levels of difficulty. Based on the extant literature, textitquery specificity is an important indicator of query performance and is typically estimated using corpus-specific frequency-based specificity metrics However, such metrics do not consider term semantics and inter-term associations. Our work presented in this paper distinguishes itself by proposing a host of corpus-independent specificity metrics that are based on pre-trained neural embeddings and leverage geometric relations between terms in the embedding space in order to capture the semantics of terms and their interdependencies. Specifically, we propose three classes of specificity metrics based on pre-trained neural embeddings: neighborhood-based, graph-based, and cluster-based metrics. Through two extensive and complementary sets of experiments, we show that the proposed specificity metrics (1) are suitable specificity indicators, based on the gold standards derived from knowledge hierarchies (Wikipedia category hierarchy and DMOZ taxonomy), and (2) have better or competitive performance compared to the state of the art QPP metrics, based on both TREC ad hoc collections namely Robust'04, Gov2 and ClueWeb'09 and ANTIQUE question answering collection. The proposed graph-based specificity metrics, especially those that capture a larger number of inter-term associations, proved to be the most effective in both query specificity estimation and QPP. We have also publicly released two test collections (i.e. specificity gold standards) that we built from the Wikipedia and DMOZ knowledge hierarchies.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}

Close
In information retrieval, the task of query performance prediction (QPP) is concerned with determining in advance the performance of a given query within the context of a retrieval model. QPP has an important role in ensuring proper handling of queries with varying levels of difficulty. Based on the extant literature, textitquery specificity is an important indicator of query performance and is typically estimated using corpus-specific frequency-based specificity metrics However, such metrics do not consider term semantics and inter-term associations. Our work presented in this paper distinguishes itself by proposing a host of corpus-independent specificity metrics that are based on pre-trained neural embeddings and leverage geometric relations between terms in the embedding space in order to capture the semantics of terms and their interdependencies. Specifically, we propose three classes of specificity metrics based on pre-trained neural embeddings: neighborhood-based, graph-based, and cluster-based metrics. Through two extensive and complementary sets of experiments, we show that the proposed specificity metrics (1) are suitable specificity indicators, based on the gold standards derived from knowledge hierarchies (Wikipedia category hierarchy and DMOZ taxonomy), and (2) have better or competitive performance compared to the state of the art QPP metrics, based on both TREC ad hoc collections namely Robust'04, Gov2 and ClueWeb'09 and ANTIQUE question answering collection. The proposed graph-based specificity metrics, especially those that capture a larger number of inter-term associations, proved to be the most effective in both query specificity estimation and QPP. We have also publicly released two test collections (i.e. specificity gold standards) that we built from the Wikipedia and DMOZ knowledge hierarchies.
Close
https://ls3.rnet.torontomu.ca/1-s2-0-s030645731931266x-main-3_compressed/
doi:https://doi.org/10.1016/j.ipm.2020.102248
Close
Falavarjani, Seyed Amin Mirlohi; Jovanovic, Jelena; Fani, Hossein; Ghorbani, Ali A; Noorian, Zeinab; Bagheri, Ebrahim
On The Causal Relation Between Real World Activities and Emotional Expressions of Social Media Users Journal Article
In: Journal of the Association for Information Science and Technology (JASIST), 2020.
Abstract | Links | BibTeX | Tags:
@article{jasist2020,
title = {On The Causal Relation Between Real World Activities and Emotional Expressions of Social Media Users},
author = {Seyed Amin Mirlohi Falavarjani and Jelena Jovanovic and Hossein Fani and Ali A Ghorbani and Zeinab Noorian and Ebrahim Bagheri},
url = {https://asistdl.onlinelibrary.wiley.com/journal/23301643},
doi = {https://doi.org/10.1002/asi.24440},
year = {2020},
date = {2020-01-01},
urldate = {2020-01-01},
journal = {Journal of the Association for Information Science and Technology (JASIST)},
abstract = {Social interactions through online social media have become a daily routine of many, and the number of those whose real-world (offline) and online lives have become intertwined is continuously growing. As such, the interplay of individuals' online and offline activities has been the subject of numerous research studies, the majority of which explored the impact of people's online actions on their offline activities. The opposite direction of impact - the effect of real-world activities on online actions - has also received attention but to a lesser degree. To contribute to the latter form of impact, this paper reports on a quasi-experimental design study that examined the presence of causal relations between real-world activities of online social media users and their online emotional expressions. To this end, we have collected a large dataset (over 17K users) from Twitter and Foursquare, and systematically aligned user content on the two social media platforms. Users' Foursquare check-ins provided information about their offline activities, whereas the users' expressions of emotions and moods were derived from their Twitter posts. Since our study was based on a quasi-experimental design, to minimise the impact of covariates, we applied an innovative model of computing propensity scores. Our main findings can be summarised as follows: (i) users' offline activities do impact their affective expressions, both of emotions and moods, as evidenced in their online shared textual content; (ii) the impact depends on the type of offline activity and if the user embarks on or abandons the activity. Our findings can be used to devise a personalised recommendation mechanism to help people better manage their online emotional expressions.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}

Close
Social interactions through online social media have become a daily routine of many, and the number of those whose real-world (offline) and online lives have become intertwined is continuously growing. As such, the interplay of individuals' online and offline activities has been the subject of numerous research studies, the majority of which explored the impact of people's online actions on their offline activities. The opposite direction of impact - the effect of real-world activities on online actions - has also received attention but to a lesser degree. To contribute to the latter form of impact, this paper reports on a quasi-experimental design study that examined the presence of causal relations between real-world activities of online social media users and their online emotional expressions. To this end, we have collected a large dataset (over 17K users) from Twitter and Foursquare, and systematically aligned user content on the two social media platforms. Users' Foursquare check-ins provided information about their offline activities, whereas the users' expressions of emotions and moods were derived from their Twitter posts. Since our study was based on a quasi-experimental design, to minimise the impact of covariates, we applied an innovative model of computing propensity scores. Our main findings can be summarised as follows: (i) users' offline activities do impact their affective expressions, both of emotions and moods, as evidenced in their online shared textual content; (ii) the impact depends on the type of offline activity and if the user embarks on or abandons the activity. Our findings can be used to devise a personalised recommendation mechanism to help people better manage their online emotional expressions.
Close
https://asistdl.onlinelibrary.wiley.com/journal/23301643
doi:https://doi.org/10.1002/asi.24440
Close
Khodabakhsh, Maryam; Bagheri, Ebrahim
Semantics-enabled Query Performance Prediction for Ad hoc Table Retrieval Journal Article
In: Information Processing and Management, 2020.
Abstract | Links | BibTeX | Tags:
@article{ipm2020f,
title = {Semantics-enabled Query Performance Prediction for Ad hoc Table Retrieval},
author = {Maryam Khodabakhsh and Ebrahim Bagheri},
url = {https://ls3.rnet.torontomu.ca/1-s2-0-s0306457320308943-main/},
doi = {https://doi.org/10.1016/j.ipm.2020.102399},
year = {2020},
date = {2020-01-01},
urldate = {2020-01-01},
journal = {Information Processing and Management},
abstract = {Predicting the performance of a retrieval method for a given query is a highly important and challenging problem in information retrieval. Accurate Query Performance Prediction (QPP) plays an important role in real time handling of queries with varying levels of difficulty. While there have been several successful query performance predictors, no predictors have yet been introduced within the context of the ad hoc table retrieval task, which is concerned with answering a query with a ranked list of tables. In this paper, we propose to perform query performance prediction based on neural embedding techniques for ad hoc table retrieval and introduce three neural features. The neural features are based on neural embedding techniques and leverage the distance between tokens in the embedding space in order to capture their semantic similarity. We evaluate our proposed work based on a gold standard test collection and compare it with the state-of-the-art post-retrieval query performance prediction methods. We find that our neural features (1) are effective for predicting the performance of content-based ranking functions; and not as effective for feature-based ranking functions, and (2) show a synergistic impact on existing QPP methods and hence are able to increase their performance in practice.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}

Close
Predicting the performance of a retrieval method for a given query is a highly important and challenging problem in information retrieval. Accurate Query Performance Prediction (QPP) plays an important role in real time handling of queries with varying levels of difficulty. While there have been several successful query performance predictors, no predictors have yet been introduced within the context of the ad hoc table retrieval task, which is concerned with answering a query with a ranked list of tables. In this paper, we propose to perform query performance prediction based on neural embedding techniques for ad hoc table retrieval and introduce three neural features. The neural features are based on neural embedding techniques and leverage the distance between tokens in the embedding space in order to capture their semantic similarity. We evaluate our proposed work based on a gold standard test collection and compare it with the state-of-the-art post-retrieval query performance prediction methods. We find that our neural features (1) are effective for predicting the performance of content-based ranking functions; and not as effective for feature-based ranking functions, and (2) show a synergistic impact on existing QPP methods and hence are able to increase their performance in practice.
Close
https://ls3.rnet.torontomu.ca/1-s2-0-s0306457320308943-main/
doi:https://doi.org/10.1016/j.ipm.2020.102399
Close
Bagheri, Ebrahim; Al-Obeidat, Feras
A Latent Model for Ad Hoc Table Retrieval Proceedings Article
In: 42nd European Conference on IR Research (ECIR 2020), 2020.
Abstract | Links | BibTeX | Tags:
@inproceedings{ecir2020b,
title = {A Latent Model for Ad Hoc Table Retrieval},
author = {Ebrahim Bagheri and Feras Al-Obeidat},
url = {https://ls3.rnet.torontomu.ca/978-3-030-45442-5_chapter_11/},
doi = {https://doi.org/10.1007%2F978-3-030-45442-5_11},
year = {2020},
date = {2020-01-01},
urldate = {2020-01-01},
booktitle = {42nd European Conference on IR Research (ECIR 2020)},
abstract = {The ad hoc table retrieval task is concerned with satisfying a query with a ranked list of tables. While there are strong baselines in the literature that exploit learning to rank and semantic matching techniques, there are still a set of hard queries that are difficult for these baseline methods to address. We find that such hard queries are those whose constituting tokens (i.e., terms or entities) are not fully or partially observed in the relevant tables. We focus on proposing a latent factor model to address such hard queries. Our proposed model factorizes the token-table co-occurrence matrix into two low dimensional latent factor matrices that can be used for measuring table and query similarity even if no shared tokens exist between them. We find that the variation of our proposed model that considers keywords provides statistically significant improvement over three strong baselines in terms of NDCG and ERR.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
The ad hoc table retrieval task is concerned with satisfying a query with a ranked list of tables. While there are strong baselines in the literature that exploit learning to rank and semantic matching techniques, there are still a set of hard queries that are difficult for these baseline methods to address. We find that such hard queries are those whose constituting tokens (i.e., terms or entities) are not fully or partially observed in the relevant tables. We focus on proposing a latent factor model to address such hard queries. Our proposed model factorizes the token-table co-occurrence matrix into two low dimensional latent factor matrices that can be used for measuring table and query similarity even if no shared tokens exist between them. We find that the variation of our proposed model that considers keywords provides statistically significant improvement over three strong baselines in terms of NDCG and ERR.
Close
https://ls3.rnet.torontomu.ca/978-3-030-45442-5_chapter_11/
doi:https://doi.org/10.1007%2F978-3-030-45442-5_11
Close
Hosseini, Hawre; Bagheri, Ebrahim
From Explicit to Implicit Entity Linking: A Learn to Rank Framework Proceedings Article
In: 33rd Canadian Conference on Artificial Intelligence, (Canadian AI), 2020.
Abstract | Links | BibTeX | Tags:
@inproceedings{cai2020,
title = {From Explicit to Implicit Entity Linking: A Learn to Rank Framework},
author = {Hawre Hosseini and Ebrahim Bagheri},
url = {https://ls3.rnet.torontomu.ca/wp-content/uploads/2024/04/10.1007978-3-030-47358-7_compressed-299-305.pdf},
doi = {https://doi.org/10.1007/978-3-030-47358-7_28},
year = {2020},
date = {2020-01-01},
urldate = {2020-01-01},
booktitle = {33rd Canadian Conference on Artificial Intelligence, (Canadian AI)},
abstract = {Implicit entity linking is the task of identifying an appropriate entity whose surface form is not explicitly mentioned in the text. Unlike explicit entity linking where an entity is linked to an observed phrase within the input text, implicit entity linking is concerned with determining specific yet implied entities. Existing work in the literature have already identified appropriate features that can be used for ranking relevant entities for explicit entity linking. In this paper, we (1) consider the applicability of such features for implicit entity linking, (2) introduce features that are suited for this task, (3) compare our work with the state of the art in implicit entity linking, and (4) and report on feature importance values and error analysis.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
Implicit entity linking is the task of identifying an appropriate entity whose surface form is not explicitly mentioned in the text. Unlike explicit entity linking where an entity is linked to an observed phrase within the input text, implicit entity linking is concerned with determining specific yet implied entities. Existing work in the literature have already identified appropriate features that can be used for ranking relevant entities for explicit entity linking. In this paper, we (1) consider the applicability of such features for implicit entity linking, (2) introduce features that are suited for this task, (3) compare our work with the state of the art in implicit entity linking, and (4) and report on feature importance values and error analysis.
Close
https://ls3.rnet.torontomu.ca/wp-content/uploads/2024/04/10.1007978-3-030-47358-[...]
doi:https://doi.org/10.1007/978-3-030-47358-7_28
Close
Rad, Radin Hamidi; Fani, Hossein; Kargar, Mehdi; Szlichta, Jaroslaw; Bagheri, Ebrahim
Learning to Form Skill-based Teams of Experts Proceedings Article
In: The 29th ACM International Conference on Information and Knowledge Management, (CIKM2020), 2020.
Abstract | Links | BibTeX | Tags:
@inproceedings{cikm-team,
title = {Learning to Form Skill-based Teams of Experts},
author = {Radin Hamidi Rad and Hossein Fani and Mehdi Kargar and Jaroslaw Szlichta and Ebrahim Bagheri},
url = {https://doi.org/10.1145/3340531.3412140},
doi = {https://doi.org/10.1145/3340531.3412140},
year = {2020},
date = {2020-01-01},
urldate = {2020-01-01},
booktitle = {The 29th ACM International Conference on Information and Knowledge Management, (CIKM2020)},
abstract = {We focus on the composition of teams of experts that collectively cover a set of required skills based on their historical collaboration network and expertise. Prior works are primarily based on the shortest path between experts on the expert collaboration network, and suffer from three major shortcomings: (1) they are computationally expensive due to the complexity of finding paths on large network structures; (2) they use a small portion of the entire historical collaboration network to reduce the search space; hence, may form sub-optimal teams; and, (3) they fall short in sparse networks where the majority of the experts have only participated in a few teams in the past. Instead of forming a large network of experts, we propose to learn relationships among experts and skills through a variational Bayes neural architecture wherein: i) we consider all past team compositions as training instances to predict future teams; ii) we bring scalability for large networks of experts due to the neural architecture; and, iii) we address sparsity by incorporating uncertainty on the neural network's parameters which yields a richer representation and more accurate team composition. We empirically demonstrate how our proposed model outperforms the state-of-the-art approaches in terms of effectiveness and efficiency.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
We focus on the composition of teams of experts that collectively cover a set of required skills based on their historical collaboration network and expertise. Prior works are primarily based on the shortest path between experts on the expert collaboration network, and suffer from three major shortcomings: (1) they are computationally expensive due to the complexity of finding paths on large network structures; (2) they use a small portion of the entire historical collaboration network to reduce the search space; hence, may form sub-optimal teams; and, (3) they fall short in sparse networks where the majority of the experts have only participated in a few teams in the past. Instead of forming a large network of experts, we propose to learn relationships among experts and skills through a variational Bayes neural architecture wherein: i) we consider all past team compositions as training instances to predict future teams; ii) we bring scalability for large networks of experts due to the neural architecture; and, iii) we address sparsity by incorporating uncertainty on the neural network's parameters which yields a richer representation and more accurate team composition. We empirically demonstrate how our proposed model outperforms the state-of-the-art approaches in terms of effectiveness and efficiency.
Close
https://doi.org/10.1145/3340531.3412140
doi:https://doi.org/10.1145/3340531.3412140
Close
Zarrinkalam, Fattane; Faralli, Stefano; Piao, Guangyuan; Bagheri, Ebrahim
Mining User Interests from Social Media Proceedings Article
In: The 29th ACM International Conference on Information and Knowledge Management (tutorial), (CIKM2020), 2020.
Abstract | Links | BibTeX | Tags:
@inproceedings{cikm2020-tutorial,
title = {Mining User Interests from Social Media},
author = {Fattane Zarrinkalam and Stefano Faralli and Guangyuan Piao and Ebrahim Bagheri},
url = {https://ls3.rnet.torontomu.ca/3340531-3412167/},
doi = {https://doi.org/10.1145/3340531.3412167},
year = {2020},
date = {2020-01-01},
urldate = {2020-01-01},
booktitle = {The 29th ACM International Conference on Information and Knowledge Management (tutorial), (CIKM2020)},
abstract = {The abundance of user generated content on social media provides the opportunity to build models that are able to accurately and effectively extract, mine and predict users’ interests with the hopes of enabling more effective user engagement, better quality delivery of appropriate services and higher user satisfaction. While traditional methods for building user profiles relied on AI-based preference elicitation techniques that could have been considered to be intrusive and undesirable by the users, more recent advances are focused on a non-intrusive yet accurate way of determining users’ interests and preferences. In this tutorial, we will cover five important aspects related to the effective mining of user interests: we will introduce (1) the information sources that are used for extracting user interests, (2) the variety of types of user interest profiles that have been proposed in the literature, (3) techniques that have been adopted or proposed for mining user interests, (4) the scalability and resource requirements of the state of the art methods and, finally (5)the evaluation methodologies that are adopted in the literature for validating the appropriateness of the mined user interest profiles.We will also introduce existing challenges, open research questions and exciting opportunities for further work.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
The abundance of user generated content on social media provides the opportunity to build models that are able to accurately and effectively extract, mine and predict users’ interests with the hopes of enabling more effective user engagement, better quality delivery of appropriate services and higher user satisfaction. While traditional methods for building user profiles relied on AI-based preference elicitation techniques that could have been considered to be intrusive and undesirable by the users, more recent advances are focused on a non-intrusive yet accurate way of determining users’ interests and preferences. In this tutorial, we will cover five important aspects related to the effective mining of user interests: we will introduce (1) the information sources that are used for extracting user interests, (2) the variety of types of user interest profiles that have been proposed in the literature, (3) techniques that have been adopted or proposed for mining user interests, (4) the scalability and resource requirements of the state of the art methods and, finally (5)the evaluation methodologies that are adopted in the literature for validating the appropriateness of the mined user interest profiles.We will also introduce existing challenges, open research questions and exciting opportunities for further work.
Close
https://ls3.rnet.torontomu.ca/3340531-3412167/
doi:https://doi.org/10.1145/3340531.3412167
Close
Arabzadeh, Negar; Zarrinkalam, Fattane; Jovanovic, Jelena; Bagheri, Ebrahim
Neural Embedding-based Metrics for Pre-Retrieval Query Performance Prediction Proceedings Article
In: 42nd European Conference on IR Research (ECIR 2020), 2020.
Abstract | Links | BibTeX | Tags:
@inproceedings{ecir2020c,
title = {Neural Embedding-based Metrics for Pre-Retrieval Query Performance Prediction},
author = {Negar Arabzadeh and Fattane Zarrinkalam and Jelena Jovanovic and Ebrahim Bagheri},
url = {https://ls3.rnet.torontomu.ca/wp-content/uploads/2024/04/Neural_Embedding_Based_Metrics_for_Pre_r.pdf},
doi = { https://doi.org/10.1007/978-3-030-45442-5_10},
year = {2020},
date = {2020-01-01},
urldate = {2020-01-01},
booktitle = {42nd European Conference on IR Research (ECIR 2020)},
abstract = {Query Performance Prediction (QPP) is concerned with estimating the effectiveness of a query within the context of a retrieval model. It allows for operations such as query routing and segmentation, leading to improved retrieval performance. Pre-retrieval QPP methods are oblivious to the performance of the retrieval model as they predict query difficulty prior to observing the set of documents retrieved for the query. Since neural embedding-based models are showing wider adoption in the information retrieval community, in this paper, we propose a set of pre-retrieval QPP metrics based on the properties of pre-trained neural embeddings and show that such metrics are more effective for performance prediction compared to the widely known QPP metrics such as SCQ, PMI and SCS. We report our findings based on Robust04, ClueWeb09 and Gov2 corpora and their associated TREC topics.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
Query Performance Prediction (QPP) is concerned with estimating the effectiveness of a query within the context of a retrieval model. It allows for operations such as query routing and segmentation, leading to improved retrieval performance. Pre-retrieval QPP methods are oblivious to the performance of the retrieval model as they predict query difficulty prior to observing the set of documents retrieved for the query. Since neural embedding-based models are showing wider adoption in the information retrieval community, in this paper, we propose a set of pre-retrieval QPP metrics based on the properties of pre-trained neural embeddings and show that such metrics are more effective for performance prediction compared to the widely known QPP metrics such as SCQ, PMI and SCS. We report our findings based on Robust04, ClueWeb09 and Gov2 corpora and their associated TREC topics.
Close
https://ls3.rnet.torontomu.ca/wp-content/uploads/2024/04/Neural_Embedding_Based_[...]
doi: https://doi.org/10.1007/978-3-030-45442-5_10
Close
Tamannaee, Mahtab; Fani, Hossein; Zarrinkalam, Fattane; Samouh, Jamil; Paydar, Samad; Bagheri, Ebrahim
ReQue: A Configurable Workflow and Dataset Collection for Query Refinement Proceedings Article
In: The 29th ACM International Conference on Information and Knowledge Management, (CIKM2020), 2020.
Abstract | Links | BibTeX | Tags:
@inproceedings{cikm-reque,
title = {ReQue: A Configurable Workflow and Dataset Collection for Query Refinement},
author = {Mahtab Tamannaee and Hossein Fani and Fattane Zarrinkalam and Jamil Samouh and Samad Paydar and Ebrahim Bagheri},
url = {https://ls3.rnet.torontomu.ca/3340531-3412775/},
doi = {https://doi.org/10.1145/3340531.3412775},
year = {2020},
date = {2020-01-01},
urldate = {2020-01-01},
booktitle = {The 29th ACM International Conference on Information and Knowledge Management, (CIKM2020)},
abstract = {In this paper, we implement and publicly share a configurable software workflow and a collection of gold standard datasets for training and evaluating supervised query refinement methods. Existing datasets such as AOL and MS MARCO, which have been extensively used in the literature for this purpose, are based on the weak assumption that users’ input queries improve gradually within a search session, i.e., the last query where the user ends her information seeking session is the best reconstructed version of her initial query. In practice, such an assumption is not necessarily accurate for a variety of reasons, e.g., topic drift. The objective of our work is to enable researchers to build gold standard query refinement datasets without having to rely on such weak assumptions. Our software workflow, which generates such gold standard query datasets, takes three inputs: (1) a dataset of queries along with their associated relevance judgements (e.g. TREC topics), (2) an information retrieval method (e.g., BM25), and (3) an evaluation metric (e.g., MAP), and outputs a gold standard dataset. The produced gold standard dataset includes a list of revised queries for each query in the input dataset, each of which effectively improves the performance of the specified retrieval method (e.g., BM25) in terms of the desirable evaluation metric (e.g., MAP). Since our workflow can be used to generate gold standard datasets for any input query set, in this paper, we have generated and publicly shared gold standard datasets for TREC queries associated with Robust04, Gov2, ClueWeb09, and ClueWeb12. The source code of our software workflow, the generated gold datasets, and benchmark results for three state-of-the-art supervised query refinement methods over these datasets are made publicly available for reproducibility purposes.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
In this paper, we implement and publicly share a configurable software workflow and a collection of gold standard datasets for training and evaluating supervised query refinement methods. Existing datasets such as AOL and MS MARCO, which have been extensively used in the literature for this purpose, are based on the weak assumption that users’ input queries improve gradually within a search session, i.e., the last query where the user ends her information seeking session is the best reconstructed version of her initial query. In practice, such an assumption is not necessarily accurate for a variety of reasons, e.g., topic drift. The objective of our work is to enable researchers to build gold standard query refinement datasets without having to rely on such weak assumptions. Our software workflow, which generates such gold standard query datasets, takes three inputs: (1) a dataset of queries along with their associated relevance judgements (e.g. TREC topics), (2) an information retrieval method (e.g., BM25), and (3) an evaluation metric (e.g., MAP), and outputs a gold standard dataset. The produced gold standard dataset includes a list of revised queries for each query in the input dataset, each of which effectively improves the performance of the specified retrieval method (e.g., BM25) in terms of the desirable evaluation metric (e.g., MAP). Since our workflow can be used to generate gold standard datasets for any input query set, in this paper, we have generated and publicly shared gold standard datasets for TREC queries associated with Robust04, Gov2, ClueWeb09, and ClueWeb12. The source code of our software workflow, the generated gold datasets, and benchmark results for three state-of-the-art supervised query refinement methods over these datasets are made publicly available for reproducibility purposes.
Close
https://ls3.rnet.torontomu.ca/3340531-3412775/
doi:https://doi.org/10.1145/3340531.3412775
Close
Fani, Hossein; Bagheri, Ebrahim; Du, Weichang
Temporal Latent Space Modeling for Community Prediction Proceedings Article
In: 42nd European Conference on IR Research (ECIR 2020), 2020.
Abstract | Links | BibTeX | Tags:
@inproceedings{ecir2020a,
title = {Temporal Latent Space Modeling for Community Prediction},
author = {Hossein Fani and Ebrahim Bagheri and Weichang Du},
url = {https://ls3.rnet.torontomu.ca/wp-content/uploads/2024/04/978-3-030-45439-5_Chapter_49-2.pdf},
doi = {https://doi.org/10.1007%2F978-3-030-45439-5_49},
year = {2020},
date = {2020-01-01},
urldate = {2020-01-01},
booktitle = {42nd European Conference on IR Research (ECIR 2020)},
abstract = {We propose a temporal latent space model for user community prediction in social networks, whose goal is to predict future emerging user communities based on past history of users' topics of interest. Our model assumes that each user lies within an unobserved latent space, and similar users in the latent space representation are more likely to be members of the same user community. The model allows each user to adjust its location in the latent space as her topics of interest evolve over time. Empirically, we demonstrate that our model, when evaluated on a Twitter dataset, outperforms existing approaches under two application scenarios, namely news recommendation and user prediction on a host of metrics such as mrr, ndcg as well as precision and f-measure.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
We propose a temporal latent space model for user community prediction in social networks, whose goal is to predict future emerging user communities based on past history of users' topics of interest. Our model assumes that each user lies within an unobserved latent space, and similar users in the latent space representation are more likely to be members of the same user community. The model allows each user to adjust its location in the latent space as her topics of interest evolve over time. Empirically, we demonstrate that our model, when evaluated on a Twitter dataset, outperforms existing approaches under two application scenarios, namely news recommendation and user prediction on a host of metrics such as mrr, ndcg as well as precision and f-measure.
Close
https://ls3.rnet.torontomu.ca/wp-content/uploads/2024/04/978-3-030-45439-5_Chapt[...]
doi:https://doi.org/10.1007%2F978-3-030-45439-5_49
Close
Tamannaee, Mahtab; Arabzadeh, Negar; Bagheri, Ebrahim
Vis-Trec: A System for the In-depth Analysis of trec_eval Results Proceedings Article
In: 43rd International ACM SIGIR Conference on Research and Development in Information Retrieval (demo), 2020.
Links | BibTeX | Tags:
@inproceedings{sigir2020,
title = {Vis-Trec: A System for the In-depth Analysis of trec_eval Results},
author = {Mahtab Tamannaee and Negar Arabzadeh and Ebrahim Bagheri},
url = {https://ls3.rnet.torontomu.ca/3397271-3401412/},
doi = {https://doi.org/10.1145/3397271.3401412},
year = {2020},
date = {2020-01-01},
urldate = {2020-01-01},
booktitle = {43rd International ACM SIGIR Conference on Research and Development in Information Retrieval (demo)},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
https://ls3.rnet.torontomu.ca/3397271-3401412/
doi:https://doi.org/10.1145/3397271.3401412
Close
2019
Pourgholamali, Fatemeh; Kahani, Mohsen; Bagheri, Ebrahim
A Neural Graph Embedding Approach for Selecting Review Sentences Journal Article
In: Electronic Commerce Research and Applications, 2019.
Abstract | Links | BibTeX | Tags:
@article{ECRA2019,
title = {A Neural Graph Embedding Approach for Selecting Review Sentences},
author = {Fatemeh Pourgholamali and Mohsen Kahani and Ebrahim Bagheri},
url = {https://ls3.rnet.torontomu.ca/1-s2-0-s1567422319300948-main_compressed-1/},
doi = {https://doi.org/10.1016/j.elerap.2019.100917},
year = {2019},
date = {2019-01-01},
urldate = {2019-01-01},
journal = {Electronic Commerce Research and Applications},
abstract = {Product reviews written by the crowd on e-commerce shopping websites have become a critical information source for making purchasing decisions. Researchers have already extensively studied the impact of review sentiments and informativeness on product sales and success, finding a close relationship. An important challenge, however, is that the vast majority of products (e.g., >90% of products on amazon.com) do not receive enough attention and lack sufficient reviews by the users; hence, they constitute the so-called textitcold products. One solution to address cold products, which has already been studied in the literature, is to generate reviews for these products by sampling review sentences from closely related warm products. Our method proposed in this paper is specifically focused on such a solution. While a majority of the works in the literature rely on product specification similarity to identify relevant reviews that can be used for review sentence selection, our work differs in that it not only employs product specification similarity but also employs product-review, product-user, and user-review interactions when determining the suitability of a review sentence to be selected. More specifically, the contributions of our work can be enumerated as follows: (1) We propose that the selection of review sentences from other products should not only consider product-product similarity but also consider product-review, user-review, and user-user relationships. As such, we show how neural graph embeddings can be used to encode product, user, and review information into an attributed heterogeneous graph representation based on which similarities can be calculated. (2) We further propose how review textitrelevance and textitimportance can be considered using graph traversal to select appropriate review sentences for a given cold product. (3) Finally, we systematically compare the performance of our work with those of several state-of-the-art baselines on five datasets collected from CNET.com and rottentomatoes.com with different characteristics from both quantitative (e.g., the Recall-Oriented Understudy for Gisting Evaluation (ROUGE) metrics) and qualitative aspects and show how our proposed approach was able to provide statistically significantly improved performance over various strong baselines.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}

Close
Product reviews written by the crowd on e-commerce shopping websites have become a critical information source for making purchasing decisions. Researchers have already extensively studied the impact of review sentiments and informativeness on product sales and success, finding a close relationship. An important challenge, however, is that the vast majority of products (e.g., >90% of products on amazon.com) do not receive enough attention and lack sufficient reviews by the users; hence, they constitute the so-called textitcold products. One solution to address cold products, which has already been studied in the literature, is to generate reviews for these products by sampling review sentences from closely related warm products. Our method proposed in this paper is specifically focused on such a solution. While a majority of the works in the literature rely on product specification similarity to identify relevant reviews that can be used for review sentence selection, our work differs in that it not only employs product specification similarity but also employs product-review, product-user, and user-review interactions when determining the suitability of a review sentence to be selected. More specifically, the contributions of our work can be enumerated as follows: (1) We propose that the selection of review sentences from other products should not only consider product-product similarity but also consider product-review, user-review, and user-user relationships. As such, we show how neural graph embeddings can be used to encode product, user, and review information into an attributed heterogeneous graph representation based on which similarities can be calculated. (2) We further propose how review textitrelevance and textitimportance can be considered using graph traversal to select appropriate review sentences for a given cold product. (3) Finally, we systematically compare the performance of our work with those of several state-of-the-art baselines on five datasets collected from CNET.com and rottentomatoes.com with different characteristics from both quantitative (e.g., the Recall-Oriented Understudy for Gisting Evaluation (ROUGE) metrics) and qualitative aspects and show how our proposed approach was able to provide statistically significantly improved performance over various strong baselines.
Close
https://ls3.rnet.torontomu.ca/1-s2-0-s1567422319300948-main_compressed-1/
doi:https://doi.org/10.1016/j.elerap.2019.100917
Close
Hosseini, Hawre; Nguyen, Tam T; Wu, Jimmy; Bagheri, Ebrahim
Implicit Entity Linking in Tweets: an Ad-hoc Retrieval Approach Journal Article
In: Applied Ontology, 2019.
Abstract | Links | BibTeX | Tags:
@article{ao2019,
title = {Implicit Entity Linking in Tweets: an Ad-hoc Retrieval Approach},
author = {Hawre Hosseini and Tam T Nguyen and Jimmy Wu and Ebrahim Bagheri},
url = {https://www.iospress.nl/journal/applied-ontology/},
doi = {https://doi.org/10.3233/AO-190215},
year = {2019},
date = {2019-01-01},
urldate = {2019-01-01},
journal = {Applied Ontology},
abstract = {Within the context of Twitter analytics, the notion of implicit entity linking has recently been introduced to refer to the identification of a named entity, which is central to the topic of the tweet, but whose surface form is not present in the tweet itself. Compared to traditional forms of entity linking where the linking process revolves around an identified surface form of a potential entity, implicit entity linking relies on contextual clues to determine whether an implicit entity is present within a given tweet and if so, which entity is being referenced. The objective of this paper, while introducing and publicly sharing a comprehensive gold standard dataset for implicit entity linking, is to perform the task of implicit entity linking. The dataset consists of 7,870 tweets, which are classified as either containing implicit entities, explicit entities, both, or neither. The implicit entities are then linked to three levels of entities on Wikipedia, namely coarse-grained level, e.g., Person, Fine-grained level, e.g., Comedian, and the actual entity, e.g., Seinfeld. The proposed model in this work formulates the problem of implicit entity linking as an ad-hoc document retrieval process where the input query is the tweet, which needs to be implicitly linked and the document space is the set of textual descriptions of entities in the knowledge base. The novel contributions of our work include: 1) designing and collecting a gold standard dataset for the task of implicit entity linking; 2) defining the implicit entity linking process as an ad-hoc document retrieval task; and 3) proposing a neural embedding-based feature function that is interpolated with prior term dependency and entity-based feature functions to enhance implicit entity linking. We systematically compare our work with a state of the art baseline and show that our method is able to provide statistically significant improvements.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}

Close
Within the context of Twitter analytics, the notion of implicit entity linking has recently been introduced to refer to the identification of a named entity, which is central to the topic of the tweet, but whose surface form is not present in the tweet itself. Compared to traditional forms of entity linking where the linking process revolves around an identified surface form of a potential entity, implicit entity linking relies on contextual clues to determine whether an implicit entity is present within a given tweet and if so, which entity is being referenced. The objective of this paper, while introducing and publicly sharing a comprehensive gold standard dataset for implicit entity linking, is to perform the task of implicit entity linking. The dataset consists of 7,870 tweets, which are classified as either containing implicit entities, explicit entities, both, or neither. The implicit entities are then linked to three levels of entities on Wikipedia, namely coarse-grained level, e.g., Person, Fine-grained level, e.g., Comedian, and the actual entity, e.g., Seinfeld. The proposed model in this work formulates the problem of implicit entity linking as an ad-hoc document retrieval process where the input query is the tweet, which needs to be implicitly linked and the document space is the set of textual descriptions of entities in the knowledge base. The novel contributions of our work include: 1) designing and collecting a gold standard dataset for the task of implicit entity linking; 2) defining the implicit entity linking process as an ad-hoc document retrieval task; and 3) proposing a neural embedding-based feature function that is interpolated with prior term dependency and entity-based feature functions to enhance implicit entity linking. We systematically compare our work with a state of the art baseline and show that our method is able to provide statistically significant improvements.
Close
https://www.iospress.nl/journal/applied-ontology/
doi:https://doi.org/10.3233/AO-190215
Close
Pourmasoumi, Asef; Kahani, Mohsen; Bagheri, Ebrahim
The Evolutionary Composition of Desirable Execution Traces from Event Logs Journal Article
In: Future Generation Computing Systems, vol. 98, pp. 78-103, 2019.
Abstract | Links | BibTeX | Tags:
@article{fgcs2019,
title = {The Evolutionary Composition of Desirable Execution Traces from Event Logs},
author = {Asef Pourmasoumi and Mohsen Kahani and Ebrahim Bagheri},
url = {http://ls3.rnet.ryerson.ca/wp-content/uploads/2019/03/fgcs.pdf},
doi = {https://doi.org/10.1016/j.future.2019.03.037},
year = {2019},
date = {2019-01-01},
urldate = {2019-01-01},
journal = {Future Generation Computing Systems},
volume = {98},
pages = {78-103},
abstract = {In this paper, we propose an evolutionary computing approach based on Genetic Algorithms for composing an efficient trace given a desirable utility function based on the observations made in the event logs of several peer-organizations. Our proposed approach works with a set of event logs from different peer-organizations and generates an efficient trace according to a utility function. The main advantage of our approach is that we primarily work with event logs that are more accurate representations of the actual execution of a process within an organization. Furthermore, we generate efficient traces that are put together through the identification of sub-parts of the observed traces that are locally optimal. We report on our experiments on the BPIC'15 dataset that show improvement in terms of the optimality of the generated traces.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}

Close
In this paper, we propose an evolutionary computing approach based on Genetic Algorithms for composing an efficient trace given a desirable utility function based on the observations made in the event logs of several peer-organizations. Our proposed approach works with a set of event logs from different peer-organizations and generates an efficient trace according to a utility function. The main advantage of our approach is that we primarily work with event logs that are more accurate representations of the actual execution of a process within an organization. Furthermore, we generate efficient traces that are put together through the identification of sub-parts of the observed traces that are locally optimal. We report on our experiments on the BPIC'15 dataset that show improvement in terms of the optimality of the generated traces.
Close
http://ls3.rnet.ryerson.ca/wp-content/uploads/2019/03/fgcs.pdf
doi:https://doi.org/10.1016/j.future.2019.03.037
Close
Falavarjani, Seyed Amin Mirlohi; Zarrinkalam, Fattane; Jovanovic, Jelena; Bagheri, Ebrahim; Ghorbani, Ali A
The Reflection of Offline Activities on Users’ Online Social Behavior: An Observational Study Journal Article
In: Information Processing and Management, 2019.
Abstract | Links | BibTeX | Tags:
@article{ipm2019e,
title = {The Reflection of Offline Activities on Users’ Online Social Behavior: An Observational Study},
author = {Seyed Amin Mirlohi Falavarjani and Fattane Zarrinkalam and Jelena Jovanovic and Ebrahim Bagheri and Ali A Ghorbani},
url = {https://ls3.rnet.torontomu.ca/1-s2-0-s0306457318309981-main_compressed/},
doi = {https://doi.org/10.1016/j.ipm.2019.102070},
year = {2019},
date = {2019-01-01},
urldate = {2019-01-01},
journal = {Information Processing and Management},
abstract = {The ever increasing presence of online social networks in users’ daily lives has led to the interplay between users’ online and offline activities. There have already been several works that have studied the impact of users’ online activities on their offline behavior, e.g., the impact of interaction with friends on an exercise social network on the number of daily steps. In this paper, we consider the inverse to what has already been studied and report on our extensive study that explores the potential causal effects of users’ offline activities on their online social behavior. The objective of our work is to understand whether the activities that users are involved with in their real daily life, which place them within or away from social situations, have any direct causal impact on their behavior in online social networks. Our work is motivated by the theory of normative social influence, which argues that individuals may show behaviors or express opinions that conform to those of the community for the sake of being accepted or from fear of rejection or isolation. We have collected data from two online social networks, namely Twitter and Foursquare, and systematically aligned user content on both social networks. On this basis, we have performed a natural experiment that took the form of an interrupted time series with a comparison group design to study whether users’ socially situated offline activities exhibited through their Foursquare check-ins impact their online behavior captured through the content they share on Twitter. Our main findings can be summarised as follows (1) a change in users’ offline behaviour that affects the level of users’ exposure to social situations, e.g., starting to go to the gym or discontinuing frequenting at bars, can have a causal impact on users’ online topical interests and sentiment; and (2) the causal relations between users’ socially situated offline activities and their online social behavior can be used to build effective predictive models of users’ online topical interests and sentiments.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}

Close
The ever increasing presence of online social networks in users’ daily lives has led to the interplay between users’ online and offline activities. There have already been several works that have studied the impact of users’ online activities on their offline behavior, e.g., the impact of interaction with friends on an exercise social network on the number of daily steps. In this paper, we consider the inverse to what has already been studied and report on our extensive study that explores the potential causal effects of users’ offline activities on their online social behavior. The objective of our work is to understand whether the activities that users are involved with in their real daily life, which place them within or away from social situations, have any direct causal impact on their behavior in online social networks. Our work is motivated by the theory of normative social influence, which argues that individuals may show behaviors or express opinions that conform to those of the community for the sake of being accepted or from fear of rejection or isolation. We have collected data from two online social networks, namely Twitter and Foursquare, and systematically aligned user content on both social networks. On this basis, we have performed a natural experiment that took the form of an interrupted time series with a comparison group design to study whether users’ socially situated offline activities exhibited through their Foursquare check-ins impact their online behavior captured through the content they share on Twitter. Our main findings can be summarised as follows (1) a change in users’ offline behaviour that affects the level of users’ exposure to social situations, e.g., starting to go to the gym or discontinuing frequenting at bars, can have a causal impact on users’ online topical interests and sentiment; and (2) the causal relations between users’ socially situated offline activities and their online social behavior can be used to build effective predictive models of users’ online topical interests and sentiments.
Close
https://ls3.rnet.torontomu.ca/1-s2-0-s0306457318309981-main_compressed/
doi:https://doi.org/10.1016/j.ipm.2019.102070
Close
Fani, Hossein; Jiang, Eric; Bagheri, Ebrahim; Al-Obeidat, Feras; Du, Weichang; Kargar, Mehdi
User Community Detection via Embedding of Social Network Structure and Temporal Content Journal Article
In: Information Processing and Management, 2019.
Abstract | Links | BibTeX | Tags:
@article{ipm2019d,
title = {User Community Detection via Embedding of Social Network Structure and Temporal Content},
author = {Hossein Fani and Eric Jiang and Ebrahim Bagheri and Feras Al-Obeidat and Weichang Du and Mehdi Kargar},
url = {https://ls3.rnet.torontomu.ca/1-s2-0-s030645731830997x-main_compressed/},
doi = {https://doi.org/10.1016/j.ipm.2019.102056},
year = {2019},
date = {2019-01-01},
urldate = {2019-01-01},
journal = {Information Processing and Management},
abstract = {Identifying and extracting user communities is an important step towards understanding social network dynamics from a macro perspective. For this reason, the work in this paper explores various aspects related to the identification of user communities. To date, user community detection methods employ either explicit links between users (link analysis), or users' topics of interest in posted content (content analysis), or in tandem. Little work has considered temporal evolution when identifying user communities in a way to group together those users who share not only similar topical interests but also similar temporal behavior towards their topics of interest. In this paper, we identify user communities through textitmultimodal feature learning (embeddings). Our core contributions can be enumerated as (a) we propose a new method for learning neural embeddings for users based on their temporal content similarity; (b) we learn user embeddings based on their social network connections (links) through neural graph embeddings; (c) we systematically interpolate temporal content-based embeddings and social link-based embeddings to capture both social network connections and temporal content evolution for representing users, and (d) we systematically evaluate the quality of each embedding type in isolation and also when interpolated together and demonstrate their performance on a Twitter dataset under two different application scenarios, namely textitnews recommendation and textituser prediction. We find that (1) content-based methods produce higher quality communities compared to link-based methods; (2) methods that consider temporal evolution of content, our proposed method in particular, show better performance compared to their non-temporal counter-parts; (3) communities that are produced when time is explicitly incorporated in user vector representations have higher quality than the ones produced when time is incorporated into a generative process, and finally (4) while link-based methods are weaker than content-based methods, their interpolation with content-based methods leads to improved quality of the identified communities.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}

Close
Identifying and extracting user communities is an important step towards understanding social network dynamics from a macro perspective. For this reason, the work in this paper explores various aspects related to the identification of user communities. To date, user community detection methods employ either explicit links between users (link analysis), or users' topics of interest in posted content (content analysis), or in tandem. Little work has considered temporal evolution when identifying user communities in a way to group together those users who share not only similar topical interests but also similar temporal behavior towards their topics of interest. In this paper, we identify user communities through textitmultimodal feature learning (embeddings). Our core contributions can be enumerated as (a) we propose a new method for learning neural embeddings for users based on their temporal content similarity; (b) we learn user embeddings based on their social network connections (links) through neural graph embeddings; (c) we systematically interpolate temporal content-based embeddings and social link-based embeddings to capture both social network connections and temporal content evolution for representing users, and (d) we systematically evaluate the quality of each embedding type in isolation and also when interpolated together and demonstrate their performance on a Twitter dataset under two different application scenarios, namely textitnews recommendation and textituser prediction. We find that (1) content-based methods produce higher quality communities compared to link-based methods; (2) methods that consider temporal evolution of content, our proposed method in particular, show better performance compared to their non-temporal counter-parts; (3) communities that are produced when time is explicitly incorporated in user vector representations have higher quality than the ones produced when time is incorporated into a generative process, and finally (4) while link-based methods are weaker than content-based methods, their interpolation with content-based methods leads to improved quality of the identified communities.
Close
https://ls3.rnet.torontomu.ca/1-s2-0-s030645731830997x-main_compressed/
doi:https://doi.org/10.1016/j.ipm.2019.102056
Close
Vo, Duc-Thuan; Bagheri, Ebrahim
Extracting Temporal Event Relations based on Event Networks Proceedings Article
In: 41st European Conference on Information Retrieval (ECIR 2019), 2019.
Links | BibTeX | Tags:
@inproceedings{ecir19-1,
title = {Extracting Temporal Event Relations based on Event Networks},
author = {Duc-Thuan Vo and Ebrahim Bagheri},
url = {https://ls3.rnet.torontomu.ca/wp-content/uploads/2024/04/978-3-030-15712-8-853-859.pdf},
doi = {https://doi.org/10.3233/AO-190215},
year = {2019},
date = {2019-01-01},
urldate = {2019-01-01},
booktitle = {41st European Conference on Information Retrieval (ECIR 2019)},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
https://ls3.rnet.torontomu.ca/wp-content/uploads/2024/04/978-3-030-15712-8-853-8[...]
doi:https://doi.org/10.3233/AO-190215
Close
Arabzadeh, Negar; Zarrinkalam, Fattane; Jovanovic, Jelena; Bagheri, Ebrahim
Geometric Estimation of Specificity within Embedding Spaces Proceedings Article
In: The 28th ACM International Conference on Information and Knowledge Management (CIKM 2019), 2019.
Abstract | Links | BibTeX | Tags:
@inproceedings{cikm19,
title = {Geometric Estimation of Specificity within Embedding Spaces},
author = {Negar Arabzadeh and Fattane Zarrinkalam and Jelena Jovanovic and Ebrahim Bagheri},
url = {https://ls3.rnet.torontomu.ca/3357384-3358152/},
doi = {https://doi.org/10.1145/3357384.3358152},
year = {2019},
date = {2019-01-01},
urldate = {2019-01-01},
booktitle = {The 28th ACM International Conference on Information and Knowledge Management (CIKM 2019)},
abstract = {Specificity is the level of detail at which a given term is represented. Existing approaches to estimating term specificity are primarily dependent on corpus-level frequency statistics. In this work, we explore how neural embeddings can be used to define corpus-independent specificity metrics. Particularly, we propose to measure term specificity based on the distribution of terms in the neighborhood of the given term in the embedding space. The intuition is that a term that is surrounded by other terms in the embedding space is more likely to be specific while a term surrounded by less closely related terms is more likely to be generic. On this basis, we leverage geometric properties between embedded terms to define three groups of metrics: (1) neighborhood-based, (2) graph-based and (3) cluster-based metrics. Moreover, we employ learning-to-rank techniques to estimate term specificity in a supervised approach by employing the three proposed groups of metrics. We curate and publicly share a test collection of term specificity measurements defined based on Wikipedia's category hierarchy. We report on our experiments through metric performance comparison, ablation study and comparison against the state-of-the-art baselines.

},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
Specificity is the level of detail at which a given term is represented. Existing approaches to estimating term specificity are primarily dependent on corpus-level frequency statistics. In this work, we explore how neural embeddings can be used to define corpus-independent specificity metrics. Particularly, we propose to measure term specificity based on the distribution of terms in the neighborhood of the given term in the embedding space. The intuition is that a term that is surrounded by other terms in the embedding space is more likely to be specific while a term surrounded by less closely related terms is more likely to be generic. On this basis, we leverage geometric properties between embedded terms to define three groups of metrics: (1) neighborhood-based, (2) graph-based and (3) cluster-based metrics. Moreover, we employ learning-to-rank techniques to estimate term specificity in a supervised approach by employing the three proposed groups of metrics. We curate and publicly share a test collection of term specificity measurements defined based on Wikipedia's category hierarchy. We report on our experiments through metric performance comparison, ablation study and comparison against the state-of-the-art baselines.

Close
https://ls3.rnet.torontomu.ca/3357384-3358152/
doi:https://doi.org/10.1145/3357384.3358152
Close
Pourali, Alireza; Zarrinkalam, Fattane; Bagheri, Ebrahim
Neural Embedding Features for Point-of-Interest Recommendation Proceedings Article
In: IEEE/ACM International Conference on Social Networks Analysis and Mining (ASONAM 2019), 2019.
Abstract | Links | BibTeX | Tags:
@inproceedings{asonam19b,
title = {Neural Embedding Features for Point-of-Interest Recommendation},
author = {Alireza Pourali and Fattane Zarrinkalam and Ebrahim Bagheri},
url = {https://ls3.rnet.torontomu.ca/3341161-3343672/},
doi = {10.1145/3341161.3343672},
year = {2019},
date = {2019-01-01},
urldate = {2019-01-01},
booktitle = {IEEE/ACM International Conference on Social Networks Analysis and Mining (ASONAM 2019)},
abstract = {The focus of point-of-interest recommendation techniques is to suggest a venue to a given user that would match the users' interests and is likely to be adopted by the user. Given the multitude of venues and the sparsity of user check-ins, the problem of recommending venues has shown to be a difficult task. Existing literature has already explored various types of features such as geographical distribution, social structure and temporal behavioral patterns to make a recommendation. In this paper, we propose a new set of features derived based on the neural embeddings of venues and users. We show how the neural embeddings for users and venues can be jointly learnt based on the prior check-in sequence of users and then be used to define three types of features, namely user, venue, and user-venue interaction features. These features are integrated into a feature-based matrix factorization model. Our experiments show that the features defined over the user and venue embeddings are effective for venue recommendation.
},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
The focus of point-of-interest recommendation techniques is to suggest a venue to a given user that would match the users' interests and is likely to be adopted by the user. Given the multitude of venues and the sparsity of user check-ins, the problem of recommending venues has shown to be a difficult task. Existing literature has already explored various types of features such as geographical distribution, social structure and temporal behavioral patterns to make a recommendation. In this paper, we propose a new set of features derived based on the neural embeddings of venues and users. We show how the neural embeddings for users and venues can be jointly learnt based on the prior check-in sequence of users and then be used to define three types of features, namely user, venue, and user-venue interaction features. These features are integrated into a feature-based matrix factorization model. Our experiments show that the features defined over the user and venue embeddings are effective for venue recommendation.

Close
https://ls3.rnet.torontomu.ca/3341161-3343672/
doi:10.1145/3341161.3343672
Close
Falavarjani, Seyed Amin Mirlohi; Bagheri, Ebrahim; Chou, Ssu Yu Zoe; Jovanovic, Jelena; Ghorbani, Ali A
On the Causal Relation between Users' Real-World Activities and their Affective Processes Proceedings Article
In: IEEE/ACM International Conference on Social Networks Analysis and Mining (ASONAM 2019), 2019.
Abstract | Links | BibTeX | Tags:
@inproceedings{asonam19,
title = {On the Causal Relation between Users' Real-World Activities and their Affective Processes},
author = {Seyed Amin Mirlohi Falavarjani and Ebrahim Bagheri and Ssu Yu Zoe Chou and Jelena Jovanovic and Ali A Ghorbani},
url = {https://ls3.rnet.torontomu.ca/3341161-3342918/},
doi = {https://doi.org/10.1145/3341161.3342918},
year = {2019},
date = {2019-01-01},
urldate = {2019-01-01},
booktitle = {IEEE/ACM International Conference on Social Networks Analysis and Mining (ASONAM 2019)},
abstract = {Research in social network analytics has already extensively explored how engagement on online social networks can lead to observable effects on users' real-world behavior (e.g., changing exercising patterns or dietary habits), and their psychological states. The objective of our work in this paper is to investigate the flip-side and examine whether engaging in or disengaging from real-world activities would reflect itself in users' affective processes such as anger, anxiety, and sadness, as expressed in users' posts on online social media. We have collected data from Foursquare and Twitter and found that engaging in or disengaging from a real-world activity, such as frequenting at bars or stopping going to a gym, have direct impact on the users' affective processes. In particular, we report that engaging in a routine real-world activity leads to expressing less emotional content online, whereas the reverse is observed when users abandon a regular real-world activity.

},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
Research in social network analytics has already extensively explored how engagement on online social networks can lead to observable effects on users' real-world behavior (e.g., changing exercising patterns or dietary habits), and their psychological states. The objective of our work in this paper is to investigate the flip-side and examine whether engaging in or disengaging from real-world activities would reflect itself in users' affective processes such as anger, anxiety, and sadness, as expressed in users' posts on online social media. We have collected data from Foursquare and Twitter and found that engaging in or disengaging from a real-world activity, such as frequenting at bars or stopping going to a gym, have direct impact on the users' affective processes. In particular, we report that engaging in a routine real-world activity leads to expressing less emotional content online, whereas the reverse is observed when users abandon a regular real-world activity.

Close
https://ls3.rnet.torontomu.ca/3341161-3342918/
doi:https://doi.org/10.1145/3341161.3342918
Close
Fard, Amin Milani; Bagheri, Ebrahim; Wang, Ke
Relationship Prediction in Dynamic Heterogeneous Information Networks Proceedings Article
In: 41st European Conference on Information Retrieval (ECIR 2019), 2019.
Links | BibTeX | Tags:
@inproceedings{ecir19-2,
title = {Relationship Prediction in Dynamic Heterogeneous Information Networks},
author = {Amin Milani Fard and Ebrahim Bagheri and Ke Wang},
url = {https://ls3.rnet.torontomu.ca/paper/},
doi = {https://doi.org/10.1007/978-3-030-15712-8_2},
year = {2019},
date = {2019-01-01},
urldate = {2019-01-01},
booktitle = {41st European Conference on Information Retrieval (ECIR 2019)},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
https://ls3.rnet.torontomu.ca/paper/
doi:https://doi.org/10.1007/978-3-030-15712-8_2
Close
2018
Bashari, Mahdi; Bagheri, Ebrahim; Du, Weichang
Automated Composition and Optimization of Services for Variability-Intensive Domains Journal Article
In: Journal of Systems and Software, 2018.
Abstract | Links | BibTeX | Tags:
@article{jss2018-2,
title = {Automated Composition and Optimization of Services for Variability-Intensive Domains},
author = {Mahdi Bashari and Ebrahim Bagheri and Weichang Du},
url = {https://ls3.rnet.torontomu.ca/1-s2-0-s0164121218301481-main_compressed/},
doi = {https://doi.org/10.1016/j.jss.2018.07.039},
year = {2018},
date = {2018-01-01},
urldate = {2018-01-01},
journal = {Journal of Systems and Software},
abstract = {The growth in the number of publicly available services on the Web has encouraged developers to rely more heavily on such services to deliver products in a faster, cheaper and more reliable fashion. Many developers are now using a collection of these services in tandem to build their applications. While there has been much attention to the area of service composition, there are few works that examine the possibility of automatically generating service compositions for variability-intensive application domains. High variability in a domain is often captured through an organized feature space, which has the potential for developing many different application instantiations. The focus of our work is to develop an end-to-end technique that would enable the automatic generation of composite services based on a specific configuration of the feature space that would be directly executable and presented in WS-BPEL format. To this end, we adopt concepts from software product line engineering and AI planning to deliver the automated composition of online services. We will further benefit from such notions as safeness and threat from AI planning to optimize the generated service compositions by introducing parallelism where possible. Furthermore, we show how the specification of the generated service composition can be translated into executable WS-BPEL code. More specifically, the core contributions of our work are: (1) we show how AI planning techniques can be used to generate a workflow based on a feature model configuration; (2) we propose a method for optimizing a workflow generated based on AI planning techniques; and (3) we demonstrate that the optimized workflow can be directly translated into WS-BPEL code. We evaluate our work from two perspectives: i) we will first formally prove that the methods that we have proposed are sound and complete from a theoretical perspective, and ii) we will show through experimentation that our proposed work is usable from a practical point of view.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}

Close
The growth in the number of publicly available services on the Web has encouraged developers to rely more heavily on such services to deliver products in a faster, cheaper and more reliable fashion. Many developers are now using a collection of these services in tandem to build their applications. While there has been much attention to the area of service composition, there are few works that examine the possibility of automatically generating service compositions for variability-intensive application domains. High variability in a domain is often captured through an organized feature space, which has the potential for developing many different application instantiations. The focus of our work is to develop an end-to-end technique that would enable the automatic generation of composite services based on a specific configuration of the feature space that would be directly executable and presented in WS-BPEL format. To this end, we adopt concepts from software product line engineering and AI planning to deliver the automated composition of online services. We will further benefit from such notions as safeness and threat from AI planning to optimize the generated service compositions by introducing parallelism where possible. Furthermore, we show how the specification of the generated service composition can be translated into executable WS-BPEL code. More specifically, the core contributions of our work are: (1) we show how AI planning techniques can be used to generate a workflow based on a feature model configuration; (2) we propose a method for optimizing a workflow generated based on AI planning techniques; and (3) we demonstrate that the optimized workflow can be directly translated into WS-BPEL code. We evaluate our work from two perspectives: i) we will first formally prove that the methods that we have proposed are sound and complete from a theoretical perspective, and ii) we will show through experimentation that our proposed work is usable from a practical point of view.
Close
https://ls3.rnet.torontomu.ca/1-s2-0-s0164121218301481-main_compressed/
doi:https://doi.org/10.1016/j.jss.2018.07.039
Close
Biletskiy, Yevgen; Brown, Anthony J; Ranganathan, Girish R; Bagheri, Ebrahim; Akbari, Ismail
Building a business domain meta-ontology for information pre-processing Journal Article
In: Information Processing Letters, 2018.
Abstract | Links | BibTeX | Tags:
@article{ipl2018,
title = {Building a business domain meta-ontology for information pre-processing},
author = {Yevgen Biletskiy and Anthony J Brown and Girish R Ranganathan and Ebrahim Bagheri and Ismail Akbari},
url = {https://ls3.rnet.torontomu.ca/1-s2-0-s0020019018301340-main/},
doi = {https://doi.org/10.1016/j.ipl.2018.06.009},
year = {2018},
date = {2018-01-01},
urldate = {2018-01-01},
journal = {Information Processing Letters},
abstract = {Business analysts, along with other business domain software application users, have created a vast amount of business documents, which often do not have any business domain ontologies in the background. This situation leads to misinterpretation of such documents, when being processed by machines, that results in inhibiting the productiveness of computer-assisted analytical work and effectiveness of business solutions due to lack of effective semantics; therefore, business analysts (especially, if rotating) can use well-designed business domain ontologies as a backbone for their official applications. The process of extracting and capturing domain ontologies from these voluminous documents requires extensive involvement of domain experts and application of methods of ontology learning that is substantially labor intensive; therefore, some intermediate solutions which would assist in capturing business domain ontologies must be developed. The present paper proposes a solution in this direction which involves building a meta-ontology as a rapid approach in conceptualizing a business domain from huge amounts of source documents. This meta-ontology can be populated by ontological concepts, attributes and relations from business documents, and then refined in order to form better business domain ontology either through automatic ontology learning methods or some other traditional ontology building approaches.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}

Close
Business analysts, along with other business domain software application users, have created a vast amount of business documents, which often do not have any business domain ontologies in the background. This situation leads to misinterpretation of such documents, when being processed by machines, that results in inhibiting the productiveness of computer-assisted analytical work and effectiveness of business solutions due to lack of effective semantics; therefore, business analysts (especially, if rotating) can use well-designed business domain ontologies as a backbone for their official applications. The process of extracting and capturing domain ontologies from these voluminous documents requires extensive involvement of domain experts and application of methods of ontology learning that is substantially labor intensive; therefore, some intermediate solutions which would assist in capturing business domain ontologies must be developed. The present paper proposes a solution in this direction which involves building a meta-ontology as a rapid approach in conceptualizing a business domain from huge amounts of source documents. This meta-ontology can be populated by ontological concepts, attributes and relations from business documents, and then refined in order to form better business domain ontology either through automatic ontology learning methods or some other traditional ontology building approaches.
Close
https://ls3.rnet.torontomu.ca/1-s2-0-s0020019018301340-main/
doi:https://doi.org/10.1016/j.ipl.2018.06.009
Close
Khodabakhsh, Maryam; Kahani, Mohsen; Bagheri, Ebrahim; Noorian, Zeinab
Detecting Life Events From Twitter based on Temporal Semantic Features Journal Article
In: Knowledge-based Systems, 2018.
Abstract | Links | BibTeX | Tags:
@article{knosys18,
title = {Detecting Life Events From Twitter based on Temporal Semantic Features},
author = {Maryam Khodabakhsh and Mohsen Kahani and Ebrahim Bagheri and Zeinab Noorian},
url = {https://ls3.rnet.torontomu.ca/1-s2-0-s095070511830073x-main_compressed/},
doi = {https://doi.org/10.1016/j.knosys.2018.02.021},
year = {2018},
date = {2018-01-01},
urldate = {2018-01-01},
journal = {Knowledge-based Systems},
abstract = {The wide adoption of social networking and microblogging platforms by a large number of users across the globe has provided a rich source of unstructured information for understanding users' behaviors, interests and opinions at both micro and macro levels. An active area in this space is the detection of important real-world events from user-generated social content. The works in this area identify instances of events that impact a large number of users. However, a more nuanced form of an event, known as life event, is also of high importance, which in contrast to real-world events, does not impact a large number of users and is limited to at most a few people. For this reason, life events, such as marriage, travel, and career change, among others, are more difficult to detect for several reasons: i) they are specific to a given user and do not have a wider reaching reflection; ii) they are often not reported directly and need to be inferred from the content posted by individual users; and iii) many users do not report their life events on social platforms, making the problem highly class-imbalanced. In this paper, we propose a semantic approach based on word embedding techniques to model life events. We then use word mover's distance to measure the similarity of a given tweet to different types of life events, which are used as input features for a multi-class classifier. Furthermore, we show that when a sequence of tweets that have appeared before and after a given tweet of interest (temporal stacking) are considered, the performance of the life event detection task improves significantly.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}

Close
The wide adoption of social networking and microblogging platforms by a large number of users across the globe has provided a rich source of unstructured information for understanding users' behaviors, interests and opinions at both micro and macro levels. An active area in this space is the detection of important real-world events from user-generated social content. The works in this area identify instances of events that impact a large number of users. However, a more nuanced form of an event, known as life event, is also of high importance, which in contrast to real-world events, does not impact a large number of users and is limited to at most a few people. For this reason, life events, such as marriage, travel, and career change, among others, are more difficult to detect for several reasons: i) they are specific to a given user and do not have a wider reaching reflection; ii) they are often not reported directly and need to be inferred from the content posted by individual users; and iii) many users do not report their life events on social platforms, making the problem highly class-imbalanced. In this paper, we propose a semantic approach based on word embedding techniques to model life events. We then use word mover's distance to measure the similarity of a given tweet to different types of life events, which are used as input features for a multi-class classifier. Furthermore, we show that when a sequence of tweets that have appeared before and after a given tweet of interest (temporal stacking) are considered, the performance of the life event detection task improves significantly.
Close
https://ls3.rnet.torontomu.ca/1-s2-0-s095070511830073x-main_compressed/
doi:https://doi.org/10.1016/j.knosys.2018.02.021
Close
Feng, Yue; Zarrinkalam, Fattane; Bagheri, Ebrahim; Fani, Hossein; Al-Obeidat, Feras
Entity Linking of Tweets based on Dominant Entity Candidates Journal Article
In: Social Network Analysis and Mining, 2018.
Abstract | Links | BibTeX | Tags:
@article{snam2018,
title = {Entity Linking of Tweets based on Dominant Entity Candidates},
author = {Yue Feng and Fattane Zarrinkalam and Ebrahim Bagheri and Hossein Fani and Feras Al-Obeidat},
url = {https://ls3.rnet.torontomu.ca/s13278-018-0523-0/},
doi = {https://doi.org/10.1007/s13278-018-0523-0},
year = {2018},
date = {2018-01-01},
urldate = {2018-01-01},
journal = {Social Network Analysis and Mining},
abstract = {Entity linking, also known as semantic annotation, of textual content has received increasing attention. Recent works in this area have focused on entity linking on text with special characteristics such as search queries and tweets. The semantic annotation of tweets is specially proven to be challenging given the informal nature of the writing and the short length of the text. In this paper, we propose a method to perform entity linking on tweets built based on one primary hypothesis. We hypothesize that while there are formally many possible entity candidates for an ambiguous mention in a tweet, as listed on the disambiguation page of the corresponding entity on Wikipedia, there are only few entity candidates that are likely to be employed in the context of Twitter. Based on this hypothesis, we propose a method to identify such dominant entity candidates for each ambiguous mention and use them in the annotation process. Particularly, our proposed work integrates two phases i) dominant entity candidate detection, which applies community detection methods for finding the dominant candidates of ambiguous mentions; and ii) named entity disambiguation that links a tweet to entities in Wikipedia by only considering the identified dominant entity candidates. Our investigations show that: 1) there are only very few entity candidates for each ambiguous mention in a tweet that need to be considered when performing disambiguation. This helps us limit the candidate search space and hence noticeably reduce the entity linking time; 2) limiting the search space to only a subset of disambiguation options will not only improve entity linking execution time but will also lead to improved accuracy of the entity linking process when the main entity candidates of each mention are mined from a temporally aligned corpus. We show that our proposed method offers competitive results with the state-of-the-art methods in terms of precision and recall on widely-used gold standard datasets while significantly reducing the time for processing each tweet. year = 2018},
keywords = {},
pubstate = {published},
tppubtype = {article}
}

Close
Entity linking, also known as semantic annotation, of textual content has received increasing attention. Recent works in this area have focused on entity linking on text with special characteristics such as search queries and tweets. The semantic annotation of tweets is specially proven to be challenging given the informal nature of the writing and the short length of the text. In this paper, we propose a method to perform entity linking on tweets built based on one primary hypothesis. We hypothesize that while there are formally many possible entity candidates for an ambiguous mention in a tweet, as listed on the disambiguation page of the corresponding entity on Wikipedia, there are only few entity candidates that are likely to be employed in the context of Twitter. Based on this hypothesis, we propose a method to identify such dominant entity candidates for each ambiguous mention and use them in the annotation process. Particularly, our proposed work integrates two phases i) dominant entity candidate detection, which applies community detection methods for finding the dominant candidates of ambiguous mentions; and ii) named entity disambiguation that links a tweet to entities in Wikipedia by only considering the identified dominant entity candidates. Our investigations show that: 1) there are only very few entity candidates for each ambiguous mention in a tweet that need to be considered when performing disambiguation. This helps us limit the candidate search space and hence noticeably reduce the entity linking time; 2) limiting the search space to only a subset of disambiguation options will not only improve entity linking execution time but will also lead to improved accuracy of the entity linking process when the main entity candidates of each mention are mined from a temporally aligned corpus. We show that our proposed method offers competitive results with the state-of-the-art methods in terms of precision and recall on widely-used gold standard datasets while significantly reducing the time for processing each tweet. year = 2018
Close
https://ls3.rnet.torontomu.ca/s13278-018-0523-0/
doi:https://doi.org/10.1007/s13278-018-0523-0
Close
Vo, Duc-Thuan; Bagheri, Ebrahim
Feature-enriched Matrix Factorization for Relation Extraction Journal Article
In: Information Processing and Management, 2018.
Abstract | Links | BibTeX | Tags:
@article{ipm2018-thuan,
title = {Feature-enriched Matrix Factorization for Relation Extraction},
author = {Duc-Thuan Vo and Ebrahim Bagheri},
url = {https://ls3.rnet.torontomu.ca/1-s2-0-s0306457318303157-main_compressed/},
doi = {https://doi.org/10.1016/j.ipm.2018.10.011},
year = {2018},
date = {2018-01-01},
urldate = {2018-01-01},
journal = {Information Processing and Management},
abstract = {Relation extraction aims at finding meaningful relationships between two named entities from within unstructured textual content. In this paper, we define the problem of information extraction as a matrix completion problem where we employ the notion of universal schemas formed as a collection of patterns derived from open information extraction systems as well as additional features derived from grammatical clause patterns and statistical topic models. One of the challenges with earlier work that employ matrix completion methods is that such approaches require a sufficient number of observed relation instances to be able to make predictions. However, in practice there is often insufficient number of explicit evidence supporting each relation type that could be used within the matrix model. Hence, existing work suffer from a low recall. In our work, we extend the work in the state of the art by proposing novel ways of integrating two sets of features, i.e., topic models and grammatical clause structures, for alleviating the low recall problem. More specifically, we propose that it is possible to (1) employ grammatical clause information from textual sentences to serve as an implicit indication of relation type and argument similarity. The basis for this is that it is likely that similar relation types and arguments are observed within similar grammatical structures, and (2) benefit from statistical topic models to determine similarity between relation types and arguments. We employ statistical topic models to determine relation type and argument similarity based on their co-occurrence within the same topics. We have performed extensive experiments based on both gold standard and silver standard datasets. The experiments show that our approach has been able to address the low recall problem in existing methods, by showing an improvement of 21% on recall and 8% on f-measure over the state of the art baseline.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}

Close
Relation extraction aims at finding meaningful relationships between two named entities from within unstructured textual content. In this paper, we define the problem of information extraction as a matrix completion problem where we employ the notion of universal schemas formed as a collection of patterns derived from open information extraction systems as well as additional features derived from grammatical clause patterns and statistical topic models. One of the challenges with earlier work that employ matrix completion methods is that such approaches require a sufficient number of observed relation instances to be able to make predictions. However, in practice there is often insufficient number of explicit evidence supporting each relation type that could be used within the matrix model. Hence, existing work suffer from a low recall. In our work, we extend the work in the state of the art by proposing novel ways of integrating two sets of features, i.e., topic models and grammatical clause structures, for alleviating the low recall problem. More specifically, we propose that it is possible to (1) employ grammatical clause information from textual sentences to serve as an implicit indication of relation type and argument similarity. The basis for this is that it is likely that similar relation types and arguments are observed within similar grammatical structures, and (2) benefit from statistical topic models to determine similarity between relation types and arguments. We employ statistical topic models to determine relation type and argument similarity based on their co-occurrence within the same topics. We have performed extensive experiments based on both gold standard and silver standard datasets. The experiments show that our approach has been able to address the low recall problem in existing methods, by showing an improvement of 21% on recall and 8% on f-measure over the state of the art baseline.
Close
https://ls3.rnet.torontomu.ca/1-s2-0-s0306457318303157-main_compressed/
doi:https://doi.org/10.1016/j.ipm.2018.10.011
Close
Fani, Hossein; Bagheri, Ebrahim; Zarrinkalam, Fattane; Zhao, Xin; Du, Weichang
Finding Diachronic Like-Minded Users Journal Article
In: Computational Intelligence: An International Journal, vol. 34, pp. 124-144, 2018, ISSN: 1467-8640.
Abstract | Links | BibTeX | Tags:
@article{DBLP:journals/ci/FaniBZZD17,
title = {Finding Diachronic Like-Minded Users},
author = {Hossein Fani and Ebrahim Bagheri and Fattane Zarrinkalam and Xin Zhao and Weichang Du},
url = {http://onlinelibrary.wiley.com/doi/10.1111/coin.12117/full},
doi = {10.1111/coin.12117},
issn = {1467-8640},
year = {2018},
date = {2018-01-01},
journal = {Computational Intelligence: An International Journal},
volume = {34},
pages = {124-144},
publisher = {Wiley Periodicals, Inc.},
abstract = {User communities in social networks are usually identified by considering explicit structural social connections between users. While such communities can reveal important information about their members such as family or friendship ties and geographical proximity, just to name a few, they do not necessarily succeed at pulling likeminded users that share the same interests together. Therefore, researchers have explored the topical similarity of social content to build like-minded communities of users. In this paper, following the topic-based approaches, we are interested in identifying communities of users that share similar topical interests with similar temporal behavior. More specifically, we tackle the problem of identifying temporal (diachronic) topic-based communities, i.e., communities of users who have a similar temporal inclination towards emerging topics. To do so, we utilize multivariate time series analysis to model the contributions of each user towards emerging topics. Further, our modeling is completely agnostic to the topic detection method. We extract topics of interest by employing seminal topic detection methods; one graph-based and two LDA-based methods. Through our experiments on Twitter data, we demonstrate the effectiveness of our proposed temporal topic-based community detection method in the context of news recommendation, user prediction, and document timestamp prediction applications, compared to the non-temporal as well as the state-of-the-art temporal approaches.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}

Close
User communities in social networks are usually identified by considering explicit structural social connections between users. While such communities can reveal important information about their members such as family or friendship ties and geographical proximity, just to name a few, they do not necessarily succeed at pulling likeminded users that share the same interests together. Therefore, researchers have explored the topical similarity of social content to build like-minded communities of users. In this paper, following the topic-based approaches, we are interested in identifying communities of users that share similar topical interests with similar temporal behavior. More specifically, we tackle the problem of identifying temporal (diachronic) topic-based communities, i.e., communities of users who have a similar temporal inclination towards emerging topics. To do so, we utilize multivariate time series analysis to model the contributions of each user towards emerging topics. Further, our modeling is completely agnostic to the topic detection method. We extract topics of interest by employing seminal topic detection methods; one graph-based and two LDA-based methods. Through our experiments on Twitter data, we demonstrate the effectiveness of our proposed temporal topic-based community detection method in the context of news recommendation, user prediction, and document timestamp prediction applications, compared to the non-temporal as well as the state-of-the-art temporal approaches.
Close
http://onlinelibrary.wiley.com/doi/10.1111/coin.12117/full
doi:10.1111/coin.12117
Close
Pham, Ba'; Bagheri, Ebrahim; Rios, Patricia; Pourmasoumi, Asef; Robson, Reid C; Hwee, Jeremiah; Isaranuwatchai, Wanrudee; Darvesh, Matthew Page Nazia; Tricco, Andrea
Improving the Conduct of Systematic Reviews: A Process Mining Perspective Journal Article
In: Journal of Clinical Epidemiology, 2018.
Abstract | Links | BibTeX | Tags:
@article{jce2018,
title = {Improving the Conduct of Systematic Reviews: A Process Mining Perspective},
author = {Ba' Pham and Ebrahim Bagheri and Patricia Rios and Asef Pourmasoumi and Reid C Robson and Jeremiah Hwee and Wanrudee Isaranuwatchai and Matthew Page Nazia Darvesh and Andrea Tricco},
url = {https://www.journals.elsevier.com/journal-of-clinical-epidemiology},
year = {2018},
date = {2018-01-01},
journal = {Journal of Clinical Epidemiology},
abstract = {Objectives: To demonstrate the feasibility of using process mining concepts, techniques, and tools to examine and improve the systematic review process. Study Design and Setting: We conducted a simulation study evaluating a process used by one research team over one year. The process was characterized using an event log of review activities, start/end dates for review tasks, reviewers, and person-hours spent on tasks. We obtained process models from mining event logs for visual display/animation/replay of review activities. We analyzed the social networks of reviewer interactions to discern how reviewers worked together. Key outcomes included review timelines and person-time. Results: The 12 reviews included in the study included an average of 3831 titles and abstracts (range:1565-6368) and 20 studies (6-42). The average time was 463 days (range: 289-629) [881 person-hours (range: 243-1752)] per review. The average person-hours on each step were: study selection 26%, data abstraction 24%, report preparation 23%, and meta-analysis 17%. Social network analyses showed that the team handled tasks according to their expected roles (e.g., methodologists developed review questions, librarians conducted searches, and review coordinators coordinated tasks). Conclusion: Process mining is valuable for review teams interested in improving and modernizing the conduct of systematic reviews.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}

Close
Objectives: To demonstrate the feasibility of using process mining concepts, techniques, and tools to examine and improve the systematic review process. Study Design and Setting: We conducted a simulation study evaluating a process used by one research team over one year. The process was characterized using an event log of review activities, start/end dates for review tasks, reviewers, and person-hours spent on tasks. We obtained process models from mining event logs for visual display/animation/replay of review activities. We analyzed the social networks of reviewer interactions to discern how reviewers worked together. Key outcomes included review timelines and person-time. Results: The 12 reviews included in the study included an average of 3831 titles and abstracts (range:1565-6368) and 20 studies (6-42). The average time was 463 days (range: 289-629) [881 person-hours (range: 243-1752)] per review. The average person-hours on each step were: study selection 26%, data abstraction 24%, report preparation 23%, and meta-analysis 17%. Social network analyses showed that the team handled tasks according to their expected roles (e.g., methodologists developed review questions, librarians conducted searches, and review coordinators coordinated tasks). Conclusion: Process mining is valuable for review teams interested in improving and modernizing the conduct of systematic reviews.
Close
https://www.journals.elsevier.com/journal-of-clinical-epidemiology
Close
Zarrinkalam, Fattane; Kahani, Mohsen; Bagheri, Ebrahim
Mining User Interests over Active Topics on Social Networks Journal Article
In: Information Processing and Management, 2018.
Abstract | Links | BibTeX | Tags:
@article{IMP2017b,
title = {Mining User Interests over Active Topics on Social Networks},
author = {Fattane Zarrinkalam and Mohsen Kahani and Ebrahim Bagheri},
url = {https://ls3.rnet.torontomu.ca/1-s2-0-s0306457317307070-main/},
doi = {https://doi.org/10.1016/j.ipm.2017.12.003},
year = {2018},
date = {2018-01-01},
urldate = {2018-01-01},
journal = {Information Processing and Management},
abstract = {Inferring users' interests from their activities on social networks has been an emerging research topic in the recent years. Most existing approaches heavily rely on the explicit contributions (posts) of a user and overlook users' implicit interests, i.e., those potential user interests that the user did not explicitly mention but might have interest in. Given a set of active topics present in a social network in a specified time interval, our goal is to build an interest profile for a user over these topics by considering both explicit and implicit interests of the user. The reason for this is that the interests of free-riders and cold start users who constitute a large majority of social network users, cannot be directly identified from their explicit contributions to the social network. Specifically, to infer users' implicit interests, we propose a graph-based link prediction schema that operates over a representation model consisting of three types of information: user explicit contributions to topics, relationships between users, and the relatedness between topics. Through extensive experiments on different variants of our representation model and considering both homogeneous and heterogeneous link prediction, we investigate how topic relatedness and users' homophily relation impact the quality of inferring users' implicit interests. Comparison with state-of-the-art baselines on a real-world Twitter dataset demonstrates the effectiveness of our model in inferring users' interests in terms of perplexity and in the context of retweet prediction application. Moreover, we further show that the impact of our work is especially meaningful when considered in case of free-riders and cold start users},
keywords = {},
pubstate = {published},
tppubtype = {article}
}

Close
Inferring users' interests from their activities on social networks has been an emerging research topic in the recent years. Most existing approaches heavily rely on the explicit contributions (posts) of a user and overlook users' implicit interests, i.e., those potential user interests that the user did not explicitly mention but might have interest in. Given a set of active topics present in a social network in a specified time interval, our goal is to build an interest profile for a user over these topics by considering both explicit and implicit interests of the user. The reason for this is that the interests of free-riders and cold start users who constitute a large majority of social network users, cannot be directly identified from their explicit contributions to the social network. Specifically, to infer users' implicit interests, we propose a graph-based link prediction schema that operates over a representation model consisting of three types of information: user explicit contributions to topics, relationships between users, and the relatedness between topics. Through extensive experiments on different variants of our representation model and considering both homogeneous and heterogeneous link prediction, we investigate how topic relatedness and users' homophily relation impact the quality of inferring users' implicit interests. Comparison with state-of-the-art baselines on a real-world Twitter dataset demonstrates the effectiveness of our model in inferring users' interests in terms of perplexity and in the context of retweet prediction application. Moreover, we further show that the impact of our work is especially meaningful when considered in case of free-riders and cold start users
Close
https://ls3.rnet.torontomu.ca/1-s2-0-s0306457317307070-main/
doi:https://doi.org/10.1016/j.ipm.2017.12.003
Close
Lashkari, Fatemeh; Bagheri, Ebrahim; Ghorbani, Ali A
Neural Embedding-based Indices for Semantic Search Journal Article
In: Information Processing and Management, pp. to appear, 2018.
Abstract | Links | BibTeX | Tags:
@article{ipm2018-fatemehl,
title = {Neural Embedding-based Indices for Semantic Search},
author = {Fatemeh Lashkari and Ebrahim Bagheri and Ali A Ghorbani},
url = {https://ls3.rnet.torontomu.ca/1-s2-0-s0306457318302413-main_compressed/},
doi = {https://doi.org/10.1016/j.ipm.2018.10.015},
year = {2018},
date = {2018-01-01},
urldate = {2018-01-01},
journal = {Information Processing and Management},
pages = {to appear},
abstract = {Traditional information retrieval techniques that primarily rely on keyword-based linking of the query and document spaces face challenges such as the emphvocabulary mismatch problem where relevant documents to a given query might not be retrieved simply due to the use of different terminology for describing the same concepts. As such, semantic search techniques aim to address such limitations of keyword-based retrieval models by incorporating semantic information from standard knowledge bases such as Freebase and DBpedia. The literature has already shown that while the sole consideration of semantic information might not lead to improved retrieval performance over keyword-based search, their consideration enables the retrieval of a set of relevant documents that cannot be retrieved by keyword-based methods. As such, building indices that store and provide access to semantic information during the retrieval process is important. While the process for building and querying keyword-based indices is quite well understood, the incorporation of semantic information within search indices is still an open challenge. Existing work have proposed to build one unified index encompassing both textual and semantic information or to build separate yet integrated indices for each information type but they face limitations such as increased query process time. In this paper, we propose to use neural embeddings-based representations of term, semantic entity, semantic type and documents within the same embedding space to facilitate the development of a unified search index that would consist of these four information types. We perform experiments on standard and widely used document collections including Clueweb09-B and Robust04 to evaluate our proposed indexing strategy from both empheffectiveness and emphefficiency perspectives. Based on our experiments, we find that when neural embeddings are used to build inverted indices; hence relaxing the requirement to explicitly observe the posting list key in the indexed document: (a) textitretrieval efficiency will increase compared to a standard inverted index, hence reduces the index size and query processing time, and (b) while retrieval efficiency, which is the main objective of an efficient indexing mechanism improves using our proposed method, textitretrieval effectiveness also retains competitive performance compared to the baseline in terms of retrieving a reasonable number of relevant documents from the indexed corpus.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}

Close
Traditional information retrieval techniques that primarily rely on keyword-based linking of the query and document spaces face challenges such as the emphvocabulary mismatch problem where relevant documents to a given query might not be retrieved simply due to the use of different terminology for describing the same concepts. As such, semantic search techniques aim to address such limitations of keyword-based retrieval models by incorporating semantic information from standard knowledge bases such as Freebase and DBpedia. The literature has already shown that while the sole consideration of semantic information might not lead to improved retrieval performance over keyword-based search, their consideration enables the retrieval of a set of relevant documents that cannot be retrieved by keyword-based methods. As such, building indices that store and provide access to semantic information during the retrieval process is important. While the process for building and querying keyword-based indices is quite well understood, the incorporation of semantic information within search indices is still an open challenge. Existing work have proposed to build one unified index encompassing both textual and semantic information or to build separate yet integrated indices for each information type but they face limitations such as increased query process time. In this paper, we propose to use neural embeddings-based representations of term, semantic entity, semantic type and documents within the same embedding space to facilitate the development of a unified search index that would consist of these four information types. We perform experiments on standard and widely used document collections including Clueweb09-B and Robust04 to evaluate our proposed indexing strategy from both empheffectiveness and emphefficiency perspectives. Based on our experiments, we find that when neural embeddings are used to build inverted indices; hence relaxing the requirement to explicitly observe the posting list key in the indexed document: (a) textitretrieval efficiency will increase compared to a standard inverted index, hence reduces the index size and query processing time, and (b) while retrieval efficiency, which is the main objective of an efficient indexing mechanism improves using our proposed method, textitretrieval effectiveness also retains competitive performance compared to the baseline in terms of retrieving a reasonable number of relevant documents from the indexed corpus.
Close
https://ls3.rnet.torontomu.ca/1-s2-0-s0306457318302413-main_compressed/
doi:https://doi.org/10.1016/j.ipm.2018.10.015
Close
Bagheri, Ebrahim; Ensan, Faezeh; Al-Obeidat, Feras
Neural Word and Entity Embeddings for Ad hoc Retrieval Journal Article
In: Information Processing and Management year = 2018, vol. 54, pp. 339–357, 2018.
Abstract | Links | BibTeX | Tags:
@article{ipm2018-2,
title = {Neural Word and Entity Embeddings for Ad hoc Retrieval},
author = {Ebrahim Bagheri and Faezeh Ensan and Feras Al-Obeidat},
url = {http://ls3.rnet.ryerson.ca/wiki/images/2/29/NeuralEmbeddings_IPM.pdf},
doi = {https://doi.org/10.1016/j.ipm.2018.04.007},
year = {2018},
date = {2018-01-01},
urldate = {2018-01-01},
journal = {Information Processing and Management year = 2018},
volume = {54},
pages = {339–357},
abstract = {Learning low dimensional dense representations of the vocabularies of a corpus, known as neural embeddings, has gained much attention in the information retrieval community. While there have been several successful attempts at integrating embeddings within the ad hoc document retrieval task, yet, no systematic study has been reported that explores the various aspects of neural embeddings and how they impact retrieval performance. In this paper, we perform a methodical study on how neural embeddings influence the ad hoc document retrieval task. More specifically, we systematically explore the following research questions: i) do methods solely based on neural embeddings perform competitively with state of the art retrieval methods with and without interpolation? ii) are there any statistically significant difference between the performance of retrieval models when based on textitword embeddings compared to when knowledge graph entity embeddings are used? and iii) is there significant difference between using locally trained neural embeddings compared to when globally trained neural embeddings are used? We examine these three research questions across both hard and all queries.Our study finds that word embeddings do not show competitive performance to any of the baselines. In contrast, entity embeddings show competitive performance to the baselines and when interpolated, outperform the best baselines for both hard and soft queries.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}

Close
Learning low dimensional dense representations of the vocabularies of a corpus, known as neural embeddings, has gained much attention in the information retrieval community. While there have been several successful attempts at integrating embeddings within the ad hoc document retrieval task, yet, no systematic study has been reported that explores the various aspects of neural embeddings and how they impact retrieval performance. In this paper, we perform a methodical study on how neural embeddings influence the ad hoc document retrieval task. More specifically, we systematically explore the following research questions: i) do methods solely based on neural embeddings perform competitively with state of the art retrieval methods with and without interpolation? ii) are there any statistically significant difference between the performance of retrieval models when based on textitword embeddings compared to when knowledge graph entity embeddings are used? and iii) is there significant difference between using locally trained neural embeddings compared to when globally trained neural embeddings are used? We examine these three research questions across both hard and all queries.Our study finds that word embeddings do not show competitive performance to any of the baselines. In contrast, entity embeddings show competitive performance to the baselines and when interpolated, outperform the best baselines for both hard and soft queries.
Close
http://ls3.rnet.ryerson.ca/wiki/images/2/29/NeuralEmbeddings_IPM.pdf
doi:https://doi.org/10.1016/j.ipm.2018.04.007
Close
Vigod, Simone N; Bagheri, Ebrahim; Zarrinkalam, Fattane; Brown, Hillary K; Mamdani, Muhammad; Ray, Joel G
Online social network response to studies on antidepressant use in pregnancy Journal Article
In: Journal of Psychosomatic Research, 2018.
Abstract | Links | BibTeX | Tags:
@article{psychosom18,
title = {Online social network response to studies on antidepressant use in pregnancy},
author = {Simone N Vigod and Ebrahim Bagheri and Fattane Zarrinkalam and Hillary K Brown and Muhammad Mamdani and Joel G Ray},
url = {https://ls3.rnet.torontomu.ca/1-s2-0-s0022399917312059-main_compressed/},
doi = {https://doi.org/10.1016/j.jpsychores.2018.01.009},
year = {2018},
date = {2018-01-01},
urldate = {2018-01-01},
journal = {Journal of Psychosomatic Research},
abstract = {Background: About 8% of U.S women are prescribed antidepressant medications around the time of pregnancy. Decisions about medication use in pregnancy can be swayed by the opinion of family, friends and online media, sometimes beyond the advice offered by healthcare providers. Exploration of the online social network response to research on antidepressant use in pregnancy could provide insight about how to optimize decision-making in this complex area. Methods: For all 17 research articles published on the safety of antidepressant use in pregnancy in 2012, we sought to explore online social network activity regarding antidepressant use in pregnancy, via Twitter, in the 48 hours after a study was published, compared to the social network activity in the same period 1 week prior to each article’s publication. Results: Online social network activity about antidepressants in pregnancy quickly doubled upon study publication. The increased activity was driven by studies demonstrating harm associated with antidepressants, by lower-quality studies, and studies where abstracts presented relative versus absolute risks. Implications: These findings support a call for leadership from medical journals to consider how to best incentivize and support a balanced and clear translation of knowledge around antidepressant safety in pregnancy to their readership and the public.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}

Close
Background: About 8% of U.S women are prescribed antidepressant medications around the time of pregnancy. Decisions about medication use in pregnancy can be swayed by the opinion of family, friends and online media, sometimes beyond the advice offered by healthcare providers. Exploration of the online social network response to research on antidepressant use in pregnancy could provide insight about how to optimize decision-making in this complex area. Methods: For all 17 research articles published on the safety of antidepressant use in pregnancy in 2012, we sought to explore online social network activity regarding antidepressant use in pregnancy, via Twitter, in the 48 hours after a study was published, compared to the social network activity in the same period 1 week prior to each article’s publication. Results: Online social network activity about antidepressants in pregnancy quickly doubled upon study publication. The increased activity was driven by studies demonstrating harm associated with antidepressants, by lower-quality studies, and studies where abstracts presented relative versus absolute risks. Implications: These findings support a call for leadership from medical journals to consider how to best incentivize and support a balanced and clear translation of knowledge around antidepressant safety in pregnancy to their readership and the public.
Close
https://ls3.rnet.torontomu.ca/1-s2-0-s0022399917312059-main_compressed/
doi:https://doi.org/10.1016/j.jpsychores.2018.01.009
Close
Khodabakhsh, Maryam; Kahani, Mohsen; Bagheri, Ebrahim
Predicting Future Personal Life Events on Twitter via Recurrent Neural Networks Journal Article
In: Journal of Intelligent Information Systems, 2018.
Abstract | Links | BibTeX | Tags:
@article{jiis2018b,
title = {Predicting Future Personal Life Events on Twitter via Recurrent Neural Networks},
author = {Maryam Khodabakhsh and Mohsen Kahani and Ebrahim Bagheri},
url = {https://ls3.rnet.torontomu.ca/s10844-018-0519-2/},
doi = {https://doi.org/10.1007/s10844-018-0519-2},
year = {2018},
date = {2018-01-01},
urldate = {2018-01-01},
journal = {Journal of Intelligent Information Systems},
abstract = {Social network users publicly share a wide variety of information with their followers and the general public ranging from their opinions, sentiments and personal life activities. There has already been significant advance in analyzing the shared information from both micro (individual user) and macro (community level) perspectives, giving access to actionable insight about user and community behaviors. The identification of personal life events from user's profiles is a challenging yet important task, which if done appropriately, would facilitate more accurate identification of users' preferences, interests and attitudes. For instance, a user who has just broken his phone, is likely to be upset and also be looking to purchase a new phone. While there is work that identifies tweets that include mentions of personal life events, our work in this paper goes beyond the state of the art by predicting a future personal life event that a user will be posting about on Twitter solely based on the past tweets. We propose two architectures based on recurrent neural networks, namely the classification and generation architectures, that determine the future personal life event of a user. We evaluate our work based on a gold standard Twitter life event dataset and compare our work with the state of the art baseline technique for life event detection. While presenting performance measures, we also discuss the limitations of our work in this paper.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}

Close
Social network users publicly share a wide variety of information with their followers and the general public ranging from their opinions, sentiments and personal life activities. There has already been significant advance in analyzing the shared information from both micro (individual user) and macro (community level) perspectives, giving access to actionable insight about user and community behaviors. The identification of personal life events from user's profiles is a challenging yet important task, which if done appropriately, would facilitate more accurate identification of users' preferences, interests and attitudes. For instance, a user who has just broken his phone, is likely to be upset and also be looking to purchase a new phone. While there is work that identifies tweets that include mentions of personal life events, our work in this paper goes beyond the state of the art by predicting a future personal life event that a user will be posting about on Twitter solely based on the past tweets. We propose two architectures based on recurrent neural networks, namely the classification and generation architectures, that determine the future personal life event of a user. We evaluate our work based on a gold standard Twitter life event dataset and compare our work with the state of the art baseline technique for life event detection. While presenting performance measures, we also discuss the limitations of our work in this paper.
Close
https://ls3.rnet.torontomu.ca/s10844-018-0519-2/
doi:https://doi.org/10.1007/s10844-018-0519-2
Close
Bashari, Mahdi; Bagheri, Ebrahim; Du, Weichang
Self-Adaptation of Service Compositions through Product Line Reconfiguration Journal Article
In: Journal of Systems and Software, vol. 114, pp. 84–105, 2018.
Abstract | Links | BibTeX | Tags:
@article{jss2018-1,
title = {Self-Adaptation of Service Compositions through Product Line Reconfiguration},
author = {Mahdi Bashari and Ebrahim Bagheri and Weichang Du},
url = {https://ls3.rnet.torontomu.ca/1-s2-0-s0164121218301134-main_compressed/},
doi = {https://doi.org/10.1016/j.jss.2018.05.069},
year = {2018},
date = {2018-01-01},
urldate = {2018-01-01},
journal = {Journal of Systems and Software},
volume = {114},
pages = {84--105},
abstract = {The large number of published services has motivated the development of tools for creating customized composite services known as service compositions. While service compositions provide high agility and development flexibility, they can also pose challenges when it comes to delivering guaranteed functional and non-functional requirements. This is primarily due to the highly dynamic environment in which services operate. In this paper, we focus on maintaining functional and non-functional requirements of service compositions within high variability domains, which are often modeled through software product line techniques. One of the main characteristics of high variability application domains is that they are rich in functional features whose configuration results in an instantiated functional application. We propose adaptation mechanisms that are able to effectively maintain functional and non-functional quality requirements in service compositions derived from software product lines. Unlike many existing work, the proposed adaptation mechanism does not require explicit user-defined adaptation strategies. We adopt concepts from the software product line engineering paradigm where service compositions are viewed as a collection of features and adaptation happens through product line reconfiguration. We have practically implemented the proposed mechanism in ourtextit Magus tool suite and performed extensive experiments, which show that our work is both practical and efficient for automatically adapting service compositions once violations of functional or non-functional requirements are observed.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}

Close
The large number of published services has motivated the development of tools for creating customized composite services known as service compositions. While service compositions provide high agility and development flexibility, they can also pose challenges when it comes to delivering guaranteed functional and non-functional requirements. This is primarily due to the highly dynamic environment in which services operate. In this paper, we focus on maintaining functional and non-functional requirements of service compositions within high variability domains, which are often modeled through software product line techniques. One of the main characteristics of high variability application domains is that they are rich in functional features whose configuration results in an instantiated functional application. We propose adaptation mechanisms that are able to effectively maintain functional and non-functional quality requirements in service compositions derived from software product lines. Unlike many existing work, the proposed adaptation mechanism does not require explicit user-defined adaptation strategies. We adopt concepts from the software product line engineering paradigm where service compositions are viewed as a collection of features and adaptation happens through product line reconfiguration. We have practically implemented the proposed mechanism in ourtextit Magus tool suite and performed extensive experiments, which show that our work is both practical and efficient for automatically adapting service compositions once violations of functional or non-functional requirements are observed.
Close
https://ls3.rnet.torontomu.ca/1-s2-0-s0164121218301134-main_compressed/
doi:https://doi.org/10.1016/j.jss.2018.05.069
Close
Ali, Syed Muhammad; Noorian, Zeinab; Bagheri, Ebrahim; Ding, Chen; Al-Obeidat, Feras
Topic and Sentiment Aware Microblog Summarization for Twitter Journal Article
In: Journal of Intelligent Information Systems, 2018.
Abstract | Links | BibTeX | Tags:
@article{jiis2018c,
title = {Topic and Sentiment Aware Microblog Summarization for Twitter},
author = {Syed Muhammad Ali and Zeinab Noorian and Ebrahim Bagheri and Chen Ding and Feras Al-Obeidat},
url = {https://ls3.rnet.torontomu.ca/s10844-018-0521-8/},
doi = {https://doi.org/10.1007/s10844-018-0521-8},
year = {2018},
date = {2018-01-01},
urldate = {2018-01-01},
journal = {Journal of Intelligent Information Systems},
abstract = {Recent advances in microblog content summarization has primarily viewed this task in the context of traditional multi-document summarization techniques where a microblog post or their collection form one document. While these techniques already facilitate information aggregation, categorization and visualization of microblog posts, they fall short in two aspects: i) when summarizing a certain topic from microblog content, not all existing techniques take topic polarity into account. This is an important consideration in that the summarization of a topic should cover all aspects of the topic and hence taking polarity into account (sentiment) can lead to the inclusion of the less popular polarity in the summarization process. ii) Some summarization techniques produce summaries at the topic level. However, it is possible that a given topic can have more than one important aspect that need to have representation in the summarization process. Our work in this paper addresses these two challenges by considering both topic sentiments and topic aspects in tandem. We compare our work with the state of the art Twitter summarization techniques and show that our method is able to outperform existing methods on standard metrics such as ROUGE-1.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}

Close
Recent advances in microblog content summarization has primarily viewed this task in the context of traditional multi-document summarization techniques where a microblog post or their collection form one document. While these techniques already facilitate information aggregation, categorization and visualization of microblog posts, they fall short in two aspects: i) when summarizing a certain topic from microblog content, not all existing techniques take topic polarity into account. This is an important consideration in that the summarization of a topic should cover all aspects of the topic and hence taking polarity into account (sentiment) can lead to the inclusion of the less popular polarity in the summarization process. ii) Some summarization techniques produce summaries at the topic level. However, it is possible that a given topic can have more than one important aspect that need to have representation in the summarization process. Our work in this paper addresses these two challenges by considering both topic sentiments and topic aspects in tandem. We compare our work with the state of the art Twitter summarization techniques and show that our method is able to outperform existing methods on standard metrics such as ROUGE-1.
Close
https://ls3.rnet.torontomu.ca/s10844-018-0521-8/
doi:https://doi.org/10.1007/s10844-018-0521-8
Close
Cuzzola, John; Bagheri, Ebrahim; Jovanovic, Jelena
UMLS to DBPedia Link Discovery Through Circular Resolution Journal Article
In: Journal of the American Medical Informatics Association (JAMIA), 2018.
Abstract | Links | BibTeX | Tags:
@article{jamia18,
title = {UMLS to DBPedia Link Discovery Through Circular Resolution},
author = {John Cuzzola and Ebrahim Bagheri and Jelena Jovanovic},
url = {https://ls3.rnet.torontomu.ca/ocy021/},
doi = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7647029/#:~:text=%C2%A010.1093/jamia/ocy021},
year = {2018},
date = {2018-01-01},
urldate = {2018-01-01},
journal = {Journal of the American Medical Informatics Association (JAMIA)},
abstract = {Objective: The goal of this work is to map UMLS concepts to DBpedia resources using widely accepted ontology relations including skos:exactMatch, skos:closeMatch, and rdfs:seeAlso, as a result of which a complete mapping from UMLS to DBpedia is made publicly available that includes 221,690 skos:exactMatch, 26,276 skos:closeMatch, and 6,784,322 rdfs:seeAlso mappings. Materials and Methods: We propose a method called circular resolution that utilizes a combination of semantic annotators to map UMLS concepts to DBpedia resources. A set of annotators annotate definitions of UMLS concepts returning DBpedia resources while another set performs annotation on DBpedia resource abstracts returning UMLS concepts. Our pipeline aligns these two sets of annotations to determine appropriate mappings from UMLS to DBpedia. Results: We evaluate our proposed method using structured data from the Wikidata knowledge base as the ground truth, which consists of 4,899 already existing UMLS to DBpedia mappings. Our results show an 83% recall with 77% precision-at-one (P@1) in mapping UMLS concepts to DBpedia resources on this testing set. Conclusion: The proposed circular resolution method is a simple yet effective technique for linking UMLS concepts to DBpedia resources. Experiments using Wikidata-based ground truth reveal a high mapping accuracy. In addition to the complete UMLS mapping downloadable in n-triple format, we provide an online browser and a RESTful service to explore the mappings.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}

Close
Objective: The goal of this work is to map UMLS concepts to DBpedia resources using widely accepted ontology relations including skos:exactMatch, skos:closeMatch, and rdfs:seeAlso, as a result of which a complete mapping from UMLS to DBpedia is made publicly available that includes 221,690 skos:exactMatch, 26,276 skos:closeMatch, and 6,784,322 rdfs:seeAlso mappings. Materials and Methods: We propose a method called circular resolution that utilizes a combination of semantic annotators to map UMLS concepts to DBpedia resources. A set of annotators annotate definitions of UMLS concepts returning DBpedia resources while another set performs annotation on DBpedia resource abstracts returning UMLS concepts. Our pipeline aligns these two sets of annotations to determine appropriate mappings from UMLS to DBpedia. Results: We evaluate our proposed method using structured data from the Wikidata knowledge base as the ground truth, which consists of 4,899 already existing UMLS to DBpedia mappings. Our results show an 83% recall with 77% precision-at-one (P@1) in mapping UMLS concepts to DBpedia resources on this testing set. Conclusion: The proposed circular resolution method is a simple yet effective technique for linking UMLS concepts to DBpedia resources. Experiments using Wikidata-based ground truth reveal a high mapping accuracy. In addition to the complete UMLS mapping downloadable in n-triple format, we provide an online browser and a RESTful service to explore the mappings.
Close
https://ls3.rnet.torontomu.ca/ocy021/
doi:https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7647029/#:~:text=%C2%A010.1093/jamia/ocy021
Close
Zarrinkalam, Fattane; Mohsen,; Kahani,; Bagheri, Ebrahim
User Interest Prediction over Future Unobserved Topics on Social Networks Journal Article
In: Information Retrieval Journal, 2018.
Abstract | Links | BibTeX | Tags:
@article{inrt2018,
title = {User Interest Prediction over Future Unobserved Topics on Social Networks},
author = {Fattane Zarrinkalam and Mohsen and Kahani and Ebrahim Bagheri},
url = {https://ls3.rnet.torontomu.ca/s10791-018-9337-y/},
doi = {https://doi.org/10.1007/s10791-018-9337-y},
year = {2018},
date = {2018-01-01},
urldate = {2018-01-01},
journal = {Information Retrieval Journal},
abstract = {The accurate prediction of users' future interests on social networks allows one to perform future planning by studying how users will react if certain topics emerge in the future. It can improve areas such as targeted advertising and the efficient delivery of services. Despite the importance of predicting user future interests on social networks, existing works mainly focus on identifying user current interests and little work has been done on the prediction of user potential interests in the future. There have been work that attempt to identify a user future interests, however they cannot predict user interests with regard to new topics since these topics have never received any feedback from users in the past. In this paper, we propose a framework that works on the basis of temporal evolution of user interests and utilizes semantic information from knowledge bases such as Wikipedia to predict user future interests and overcome the cold item problem. Through extensive experiments on a real-world Twitter dataset, we demonstrate the effectiveness of our approach in predicting future interests of users compared to state-of-the-art baselines. Moreover, we further show that the impact of our work is especially meaningful when considered in case of cold items.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}

Close
The accurate prediction of users' future interests on social networks allows one to perform future planning by studying how users will react if certain topics emerge in the future. It can improve areas such as targeted advertising and the efficient delivery of services. Despite the importance of predicting user future interests on social networks, existing works mainly focus on identifying user current interests and little work has been done on the prediction of user potential interests in the future. There have been work that attempt to identify a user future interests, however they cannot predict user interests with regard to new topics since these topics have never received any feedback from users in the past. In this paper, we propose a framework that works on the basis of temporal evolution of user interests and utilizes semantic information from knowledge bases such as Wikipedia to predict user future interests and overcome the cold item problem. Through extensive experiments on a real-world Twitter dataset, we demonstrate the effectiveness of our approach in predicting future interests of users compared to state-of-the-art baselines. Moreover, we further show that the impact of our work is especially meaningful when considered in case of cold items.
Close
https://ls3.rnet.torontomu.ca/s10791-018-9337-y/
doi:https://doi.org/10.1007/s10791-018-9337-y
Close
Arabzadeh, Negar; Fani, Hossein; Zarrinkalam, Fattaneh; Navivala, Ahmed; Bagheri, Ebrahim
Causal Dependencies for Future Interest Prediction on Twitter Proceedings Article
In: The 27th ACM International Conference on Information and Knowledge Management (CIKM 2018), 2018.
Abstract | Links | BibTeX | Tags:
@inproceedings{cikm18-2,
title = {Causal Dependencies for Future Interest Prediction on Twitter},
author = {Negar Arabzadeh and Hossein Fani and Fattaneh Zarrinkalam and Ahmed Navivala and Ebrahim Bagheri},
url = {https://ls3.rnet.torontomu.ca/3269206-3269312/},
doi = {https://doi.org/10.1145/3269206.3269312},
year = {2018},
date = {2018-01-01},
urldate = {2018-01-01},
booktitle = {The 27th ACM International Conference on Information and Knowledge Management (CIKM 2018)},
abstract = {The accurate prediction of users' future topics of interests on social networks can facilitate content recommendation and platform engagement. However, researchers have found that future interest prediction, especially on social networks such as Twitter, is quite challenging due to the rapid changes in community topics and evolution of user interactions. In this context, temporal collaborative filtering methods have already been used to perform user interest prediction, which benefit from similar user behavioral patterns over time to predict how a user's interests might evolve in the future. In this paper, we propose that instead of considering the whole user base within a collaborative filtering framework to predict user interests, it is possible to much more accurately predict such interests by only considering the behavioral patterns of the most influential users related to the user of interest. We model influence as a form of causal dependency between users. To this end, we employ the concept of Granger causality to identify causal dependencies. We show through extensive experimentation that the consideration of only one causally dependent user leads to much more accurate prediction of users' future interests in a host of measures including ranking and rating accuracy metrics.

},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
The accurate prediction of users' future topics of interests on social networks can facilitate content recommendation and platform engagement. However, researchers have found that future interest prediction, especially on social networks such as Twitter, is quite challenging due to the rapid changes in community topics and evolution of user interactions. In this context, temporal collaborative filtering methods have already been used to perform user interest prediction, which benefit from similar user behavioral patterns over time to predict how a user's interests might evolve in the future. In this paper, we propose that instead of considering the whole user base within a collaborative filtering framework to predict user interests, it is possible to much more accurately predict such interests by only considering the behavioral patterns of the most influential users related to the user of interest. We model influence as a form of causal dependency between users. To this end, we employ the concept of Granger causality to identify causal dependencies. We show through extensive experimentation that the consideration of only one causally dependent user leads to much more accurate prediction of users' future interests in a host of measures including ranking and rating accuracy metrics.

Close
https://ls3.rnet.torontomu.ca/3269206-3269312/
doi:https://doi.org/10.1145/3269206.3269312
Close
Bagheri, Ebrahim; Ensan, Faezeh; Al-Obeidat, Feras
Impact of Document Representation on Neural Ad hoc Retrieval Proceedings Article
In: The 27th ACM International Conference on Information and Knowledge Management (CIKM 2018), 2018.
Abstract | Links | BibTeX | Tags:
@inproceedings{cikm18-1,
title = {Impact of Document Representation on Neural Ad hoc Retrieval},
author = {Ebrahim Bagheri and Faezeh Ensan and Feras Al-Obeidat},
url = {https://ls3.rnet.torontomu.ca/3269206-3269314/},
doi = {https://doi.org/10.1145/3269206.3269314},
year = {2018},
date = {2018-01-01},
urldate = {2018-01-01},
booktitle = {The 27th ACM International Conference on Information and Knowledge Management (CIKM 2018)},
abstract = {Neural embeddings have been effectively integrated into information retrieval tasks including ad hoc retrieval. One of the benefits of neural embeddings is they allow for the calculation of the similarity between queries and documents through vector similarity calculation methods. While such methods have been effective for document matching, they have an inherent bias towards documents that are sized relatively similarly. Therefore, the difference between the query and document lengths, referred to as the query-document size imbalance problem, becomes an issue when incorporating neural embeddings and their associated similarity calculation models into the ad hoc document retrieval process. In this paper, we propose that document representation methods need to be used to address the size imbalance problem and empirically show their impact on the performance of neural embedding-based ad hoc retrieval. In addition, we explore several types of document representation methods and investigate their impact on the retrieval process. We conduct our experiments on three widely used standard corpora, namely Clueweb09B, Clueweb12B and Robust04 and their associated topics. Summarily, we find that document representation methods are able to effectively address the query-document size imbalance problem and significantly improve the performance of neural ad hoc retrieval. In addition, we find that a document representation method based on a simple term-frequency shows significantly better performance compared to more sophisticated representation methods such as neural composition and aspect-based methods.

},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
Neural embeddings have been effectively integrated into information retrieval tasks including ad hoc retrieval. One of the benefits of neural embeddings is they allow for the calculation of the similarity between queries and documents through vector similarity calculation methods. While such methods have been effective for document matching, they have an inherent bias towards documents that are sized relatively similarly. Therefore, the difference between the query and document lengths, referred to as the query-document size imbalance problem, becomes an issue when incorporating neural embeddings and their associated similarity calculation models into the ad hoc document retrieval process. In this paper, we propose that document representation methods need to be used to address the size imbalance problem and empirically show their impact on the performance of neural embedding-based ad hoc retrieval. In addition, we explore several types of document representation methods and investigate their impact on the retrieval process. We conduct our experiments on three widely used standard corpora, namely Clueweb09B, Clueweb12B and Robust04 and their associated topics. Summarily, we find that document representation methods are able to effectively address the query-document size imbalance problem and significantly improve the performance of neural ad hoc retrieval. In addition, we find that a document representation method based on a simple term-frequency shows significantly better performance compared to more sophisticated representation methods such as neural composition and aspect-based methods.

Close
https://ls3.rnet.torontomu.ca/3269206-3269314/
doi:https://doi.org/10.1145/3269206.3269314
Close
Hosseini, Hawre; Nguyen, Tam T; Bagheri, Ebrahim
Implicit Entity Linking through Ad-hoc Retrieval Proceedings Article
In: IEEE/ACM International Conference on Social Networks Analysis and Mining (ASONAM 2018), 2018.
Abstract | Links | BibTeX | Tags:
@inproceedings{asonam2018,
title = {Implicit Entity Linking through Ad-hoc Retrieval},
author = {Hawre Hosseini and Tam T Nguyen and Ebrahim Bagheri},
url = {https://ls3.rnet.torontomu.ca/implicit_entity_linking_through_ad-hoc_retrieval/},
doi = {https://doi.org/10.1109/ASONAM.2018.8508612},
year = {2018},
date = {2018-01-01},
urldate = {2018-01-01},
booktitle = {IEEE/ACM International Conference on Social Networks Analysis and Mining (ASONAM 2018)},
abstract = {The systematic linking of explicitly-observed phrases within a document to entities of a knowledge base has already been explored in a process known as entity linking. The objective of this paper, however, is to identify and entity link those entities that are not mentioned but are implied within a document, more specifically within a tweet. This process is referred to as implicit entity linking. Unlike prior work that build a representation for each entity based on its related content in the knowledge base, we propose to perform implicit entity linking by determining how a tweet is related to user-generated content posted online and as such indirectly perform entity linking. We formulate this problem as an ad-hoc document retrieval process where the input query is the tweet, which needs to be implicitly linked and the document space is the set of user-generated content related to the entities of the knowledge base. We systematically compare our work with the state-of-the-art baseline and show that our method is able to provide statistically significant improvements.
},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
The systematic linking of explicitly-observed phrases within a document to entities of a knowledge base has already been explored in a process known as entity linking. The objective of this paper, however, is to identify and entity link those entities that are not mentioned but are implied within a document, more specifically within a tweet. This process is referred to as implicit entity linking. Unlike prior work that build a representation for each entity based on its related content in the knowledge base, we propose to perform implicit entity linking by determining how a tweet is related to user-generated content posted online and as such indirectly perform entity linking. We formulate this problem as an ad-hoc document retrieval process where the input query is the tweet, which needs to be implicitly linked and the document space is the set of user-generated content related to the entities of the knowledge base. We systematically compare our work with the state-of-the-art baseline and show that our method is able to provide statistically significant improvements.

Close
https://ls3.rnet.torontomu.ca/implicit_entity_linking_through_ad-hoc_retrieval/
doi:https://doi.org/10.1109/ASONAM.2018.8508612
Close
Pourali, Alireza; Zarrinkalam, Fattane; Bagheri, Ebrahim
Point-of-Interest Recommendation Using Heterogeneous Link Prediction Proceedings Article
In: The International Conference on Extending Database Technology (EDBT), March 26-29, Vienna, Austria, 2018.
Abstract | Links | BibTeX | Tags:
@inproceedings{edbt2018,
title = {Point-of-Interest Recommendation Using Heterogeneous Link Prediction},
author = {Alireza Pourali and Fattane Zarrinkalam and Ebrahim Bagheri},
url = {https://ls3.rnet.torontomu.ca/paper-320/},
year = {2018},
date = {2018-01-01},
urldate = {2018-01-01},
booktitle = {The International Conference on Extending Database Technology (EDBT), March 26-29, Vienna, Austria},
abstract = {Venue recommendation in location-based social networks is among the more important tasks that enhances user participation on the social network. Despite its importance, earlier research have shown that the accurate recommendation of appropriate venues for users is a difficult task specially given the highly sparse nature of user check-in information. In this paper, we show how a comprehensive set of user and venue related information can be methodically incorporated into a heterogeneous graph representation based on which the problem of venue recommendation can be viewed as an instance of the heterogeneous link prediction over the graph. We systematically compare our proposed approach with several strong baselines and show that our work that is computationally less-intensive compared to the baselines shows improved performance in terms of precision and f-measure.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
Venue recommendation in location-based social networks is among the more important tasks that enhances user participation on the social network. Despite its importance, earlier research have shown that the accurate recommendation of appropriate venues for users is a difficult task specially given the highly sparse nature of user check-in information. In this paper, we show how a comprehensive set of user and venue related information can be methodically incorporated into a heterogeneous graph representation based on which the problem of venue recommendation can be viewed as an instance of the heterogeneous link prediction over the graph. We systematically compare our proposed approach with several strong baselines and show that our work that is computationally less-intensive compared to the baselines shows improved performance in terms of precision and f-measure.
Close
https://ls3.rnet.torontomu.ca/paper-320/
Close
Khodabakhsh, Maryam; Fani, Hossein; Zarrinkalam, Fattane; Bagheri, Ebrahim
Predicting Personal Life Events from Streaming Social Content Proceedings Article
In: The 27th ACM International Conference on Information and Knowledge Management (CIKM 2018), 2018.
Abstract | Links | BibTeX | Tags:
@inproceedings{cikm18-3,
title = {Predicting Personal Life Events from Streaming Social Content},
author = {Maryam Khodabakhsh and Hossein Fani and Fattane Zarrinkalam and Ebrahim Bagheri},
url = {https://ls3.rnet.torontomu.ca/3269206-3269313/},
doi = {https://doi.org/10.1145/3269206.3269313},
year = {2018},
date = {2018-01-01},
urldate = {2018-01-01},
booktitle = {The 27th ACM International Conference on Information and Knowledge Management (CIKM 2018)},
abstract = {Researchers have shown that it is possible to identify reported instances of personal life events from users' social content, e.g., tweets. This is known as personal life event detection. In this paper, we take a step forward and explore the possibility of predicting users' next personal life event based solely on the their historically reported personal life events, a task which we refer to as personal life event prediction. We present a framework for modeling streaming social content for the purpose of personal life event prediction and describe how various instantiations of the framework can be developed to build a life event prediction model. In our extensive experiments, we find that (i) historical personal life events of a user have strong predictive power for determining the user's future life event; (ii) the consideration of sequence in historically reported personal life events shows inferior performance compared to models that do not consider sequence, and (iii) the number of historical life events and the length of the past time intervals that are taken into account for making life event predictions can impact prediction performance whereby more recent life events show more relevance for the prediction of future life events.

},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
Researchers have shown that it is possible to identify reported instances of personal life events from users' social content, e.g., tweets. This is known as personal life event detection. In this paper, we take a step forward and explore the possibility of predicting users' next personal life event based solely on the their historically reported personal life events, a task which we refer to as personal life event prediction. We present a framework for modeling streaming social content for the purpose of personal life event prediction and describe how various instantiations of the framework can be developed to build a life event prediction model. In our extensive experiments, we find that (i) historical personal life events of a user have strong predictive power for determining the user's future life event; (ii) the consideration of sequence in historically reported personal life events shows inferior performance compared to models that do not consider sequence, and (iii) the number of historical life events and the length of the past time intervals that are taken into account for making life event predictions can impact prediction performance whereby more recent life events show more relevance for the prediction of future life events.

Close
https://ls3.rnet.torontomu.ca/3269206-3269313/
doi:https://doi.org/10.1145/3269206.3269313
Close
Fani, Hossein; Bashari, Masoud; Zarrinkalam, Fattane; Bagheri, Ebrahim; Al-Obeidat, Feras
Stopword Detection for Streaming Content Proceedings Article
In: Advances in Information Retrieval: 40th European Conference on IR Research, ECIR 2018, Grenoble, France, March 26-29, 2018, Proceedings, 2018.
Abstract | Links | BibTeX | Tags:
@inproceedings{ecir2018b,
title = {Stopword Detection for Streaming Content},
author = {Hossein Fani and Masoud Bashari and Fattane Zarrinkalam and Ebrahim Bagheri and Feras Al-Obeidat},
url = {https://ls3.rnet.torontomu.ca/wp-content/uploads/2024/04/107720070.pdf},
doi = {https://doi.org/10.1007/978-3-319-76941-7_70},
year = {2018},
date = {2018-01-01},
urldate = {2018-01-01},
booktitle = {Advances in Information Retrieval: 40th European Conference on IR Research, ECIR 2018, Grenoble, France, March 26-29, 2018, Proceedings},
abstract = {The removal of stopwords is an important preprocessing step in many natural language processing tasks, which can lead to enhanced performance and execution time. Many existing methods either rely on a predefined list of stopwords or compute word significance based on metrics such as tf-idf. The objective of our work in this paper is to identify stopwords, in an unsupervised way, for streaming textual corpora such as Twitter, which have a temporal nature. We propose to consider and model the dynamics of a word within the streaming corpus to identify the ones that are less likely to be informative or discriminative. Our work is based on the discrete wavelet transform (DWT) of word signals in order to extract two features, namely scale and energy. We show that our proposed approach is effective in identifying stopwords and improves the quality of topics in the task of topic detection.

},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
The removal of stopwords is an important preprocessing step in many natural language processing tasks, which can lead to enhanced performance and execution time. Many existing methods either rely on a predefined list of stopwords or compute word significance based on metrics such as tf-idf. The objective of our work in this paper is to identify stopwords, in an unsupervised way, for streaming textual corpora such as Twitter, which have a temporal nature. We propose to consider and model the dynamics of a word within the streaming corpus to identify the ones that are less likely to be informative or discriminative. Our work is based on the discrete wavelet transform (DWT) of word signals in order to extract two features, namely scale and energy. We show that our proposed approach is effective in identifying stopwords and improves the quality of topics in the task of topic detection.

Close
https://ls3.rnet.torontomu.ca/wp-content/uploads/2024/04/107720070.pdf
doi:https://doi.org/10.1007/978-3-319-76941-7_70
Close
Falavarjani, Seyed Amin Mirlohi; Hosseini, Hawre; Bagheri, Ebrahim
The Impact of Foursquare Checkins on Users' Emotions on Twitter Proceedings Article
In: International Workshop on Social Aspects in Personalization and Search collocated with the 40th European Conference on IR Research, ECIR 2018, Grenoble, France, March 26-29, 2018, 2018.
Abstract | Links | BibTeX | Tags:
@inproceedings{ecir2018c,
title = {The Impact of Foursquare Checkins on Users' Emotions on Twitter},
author = {Seyed Amin Mirlohi Falavarjani and Hawre Hosseini and Ebrahim Bagheri},
url = {https://ls3.rnet.torontomu.ca/wp-content/uploads/2024/04/978-3-030-52485-2-1-154-162.pdf},
doi = {https://doi.org/10.1007/978-3-030-52485-2_13},
year = {2018},
date = {2018-01-01},
urldate = {2018-01-01},
booktitle = {International Workshop on Social Aspects in Personalization and Search collocated with the 40th European Conference on IR Research, ECIR 2018, Grenoble, France, March 26-29, 2018},
abstract = {Performing observational studies based on social network content has recently gained attraction where the impact of various types of interruptions has been studied on users’ behavior. There has been recent work that have focused on how online social network behavior and activity can impact users’ offline behavior. In this paper, we study the inverse where we focus on whether users’ offline behavior captured through their check-ins at different venues on Foursquare can impact users’ online emotion expression as depicted in their tweets. We show that users’ offline activity can impact users’ online emotions; however, the type of activity determines the extent to which a user’s emotions will be impacted.

},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
Performing observational studies based on social network content has recently gained attraction where the impact of various types of interruptions has been studied on users’ behavior. There has been recent work that have focused on how online social network behavior and activity can impact users’ offline behavior. In this paper, we study the inverse where we focus on whether users’ offline behavior captured through their check-ins at different venues on Foursquare can impact users’ online emotion expression as depicted in their tweets. We show that users’ offline activity can impact users’ online emotions; however, the type of activity determines the extent to which a user’s emotions will be impacted.

Close
https://ls3.rnet.torontomu.ca/wp-content/uploads/2024/04/978-3-030-52485-2-1-154[...]
doi:https://doi.org/10.1007/978-3-030-52485-2_13
Close
Trikha, Anil Kumar; Zarrinkalam, Fattane; Bagheri, Ebrahim
Topic-Association Mining for User Interest Detection Proceedings Article
In: Advances in Information Retrieval: 40th European Conference on IR Research, ECIR 2018, Grenoble, France, March 26-29, 2018, Proceedings, 2018.
Abstract | Links | BibTeX | Tags:
@inproceedings{ecir2018a,
title = {Topic-Association Mining for User Interest Detection},
author = {Anil Kumar Trikha and Fattane Zarrinkalam and Ebrahim Bagheri},
url = {https://ls3.rnet.torontomu.ca/ecir2018/},
doi = {https://doi.org/10.1007/978-3-319-76941-7_60},
year = {2018},
date = {2018-01-01},
urldate = {2018-01-01},
booktitle = {Advances in Information Retrieval: 40th European Conference on IR Research, ECIR 2018, Grenoble, France, March 26-29, 2018, Proceedings},
abstract = {The accurate identification of user interests on Twitter can lead to more efficient procurement of targeted content for the users. While the analysis of user content has engaged with on Twitter is a rich source for detecting the user’s interests, prior research have shown that it may not be sufficient. There have been work that attempt to identify a user’s implicit interests, i.e., those topics that could interest the user but the user has not engaged with them in the past. Prior work has shown that topic semantic relatedness is an important feature for determining users’ implicit interests. In this paper, we explore the possibility of identifying users’ implicit interests solely based on topic association through frequent pattern mining without regard for the semantics of the topics. We show in our experiments that topic association is a strong feature for determining users’ implicit interests.

},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
The accurate identification of user interests on Twitter can lead to more efficient procurement of targeted content for the users. While the analysis of user content has engaged with on Twitter is a rich source for detecting the user’s interests, prior research have shown that it may not be sufficient. There have been work that attempt to identify a user’s implicit interests, i.e., those topics that could interest the user but the user has not engaged with them in the past. Prior work has shown that topic semantic relatedness is an important feature for determining users’ implicit interests. In this paper, we explore the possibility of identifying users’ implicit interests solely based on topic association through frequent pattern mining without regard for the semantics of the topics. We show in our experiments that topic association is a strong feature for determining users’ implicit interests.

Close
https://ls3.rnet.torontomu.ca/ecir2018/
doi:https://doi.org/10.1007/978-3-319-76941-7_60
Close
2017
Bashari, Mahdi; Bagheri, Ebrahim; Du, Weichang
Dynamic software product line engineering: a reference framework Journal Article
In: International Journal of Software Engineering and Knowledge Engineering, vol. 27, pp. 191–234, 2017.
Abstract | Links | BibTeX | Tags:
@article{ijseke2016,
title = {Dynamic software product line engineering: a reference framework},
author = {Mahdi Bashari and Ebrahim Bagheri and Weichang Du},
url = {https://ls3.rnet.torontomu.ca/bashari-et-al-2017-dynamic-software-product-line-engineering-a-reference-framework/},
doi = {10.1142/S0218194017500085},
year = {2017},
date = {2017-01-01},
urldate = {2017-01-01},
journal = {International Journal of Software Engineering and Knowledge Engineering},
volume = {27},
pages = {191–234},
publisher = {World Scientific Publishing Company},
abstract = {Runtime adaptive systems are able to dynamically transform their internal structure,and hence their behavior, in response to internal or external changes. Such transformations provide the basis for new functionalities or improvements of the non-functional properties that match operational requirements and standards. Software Product Line Engineering (SPLE) has introduced several models and mechanisms for variability modeling and management. Dynamic software product lines (DSPL) engineering exploits the knowledge acquired in SPLE to develop systems that can be context-aware, post-deployment reconfigurable, or runtime adaptive. This paper focuses on DSPL engineering approaches for developing runtime adaptive systems and proposes a framework for classifying and comparing these approaches from two distinct perspectives: adaptation properties and adaptation realization. These two perspectives are linked together by a series of guidelines that help select a suitable adaptation realization approach based on desired adaptation types.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}

Close
Runtime adaptive systems are able to dynamically transform their internal structure,and hence their behavior, in response to internal or external changes. Such transformations provide the basis for new functionalities or improvements of the non-functional properties that match operational requirements and standards. Software Product Line Engineering (SPLE) has introduced several models and mechanisms for variability modeling and management. Dynamic software product lines (DSPL) engineering exploits the knowledge acquired in SPLE to develop systems that can be context-aware, post-deployment reconfigurable, or runtime adaptive. This paper focuses on DSPL engineering approaches for developing runtime adaptive systems and proposes a framework for classifying and comparing these approaches from two distinct perspectives: adaptation properties and adaptation realization. These two perspectives are linked together by a series of guidelines that help select a suitable adaptation realization approach based on desired adaptation types.
Close
https://ls3.rnet.torontomu.ca/bashari-et-al-2017-dynamic-software-product-line-e[...]
doi:10.1142/S0218194017500085
Close
Pourgholamali, Fatemeh; Kahani, Mohsen; Bagheri, Ebrahim; Noorian, Zeinab
Embedding Unstructured Side Information in Product Recommendation Journal Article
In: Electronic Commerce Research and Applications, vol. 25, pp. 70-851, 2017.
Links | BibTeX | Tags:
@article{ECRA2017,
title = {Embedding Unstructured Side Information in Product Recommendation},
author = {Fatemeh Pourgholamali and Mohsen Kahani and Ebrahim Bagheri and Zeinab Noorian},
url = {https://ls3.rnet.torontomu.ca/j-elerap-2017-08-001/},
doi = {https://doi.org/10.1016/j.elerap.2017.08.001},
year = {2017},
date = {2017-01-01},
urldate = {2017-01-01},
journal = {Electronic Commerce Research and Applications},
volume = {25},
pages = {70-851},
keywords = {},
pubstate = {published},
tppubtype = {article}
}

Close
https://ls3.rnet.torontomu.ca/j-elerap-2017-08-001/
doi:https://doi.org/10.1016/j.elerap.2017.08.001
Close
Keikha, Andisheh; Ensan, Faezeh; Bagheri, Ebrahim
Query Expansion Using Pseudo Relevance Feedback on Wikipedia Journal Article
In: Journal of Intelligent Information Systems, 2017.
Abstract | Links | BibTeX | Tags:
@article{jiis2017,
title = {Query Expansion Using Pseudo Relevance Feedback on Wikipedia},
author = {Andisheh Keikha and Faezeh Ensan and Ebrahim Bagheri},
url = {https://ls3.rnet.torontomu.ca/s10844-017-0466-3-1/},
doi = {https://doi.org/10.1007/s10844-017-0466-3},
year = {2017},
date = {2017-01-01},
urldate = {2017-01-01},
journal = {Journal of Intelligent Information Systems},
abstract = {One of the major challenges in Web search pertains to the correct interpretation of users’ intent. Query Expansion is one of the well-known approaches for determining the intent of the user by addressing the vocabulary mismatch problem. A limitation of the current query expansion approaches is that the relations between the query terms and the expanded terms is limited. In this paper, we capture users’ intent through query expansion. We build on earlier work in the area by adopting a pseudo-relevance feedback approach; however, we advance the state of the art by proposing an approach for feature learning within the process of query expansion. In our work, we specifically consider the Wikipedia corpus as the feedback collection space and identify the best features within this context for term selection in two supervised and unsupervised models. We compare our work with state of the art query expansion techniques, the results of which show promising robustness and improved precision.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}

Close
One of the major challenges in Web search pertains to the correct interpretation of users’ intent. Query Expansion is one of the well-known approaches for determining the intent of the user by addressing the vocabulary mismatch problem. A limitation of the current query expansion approaches is that the relations between the query terms and the expanded terms is limited. In this paper, we capture users’ intent through query expansion. We build on earlier work in the area by adopting a pseudo-relevance feedback approach; however, we advance the state of the art by proposing an approach for feature learning within the process of query expansion. In our work, we specifically consider the Wikipedia corpus as the feedback collection space and identify the best features within this context for term selection in two supervised and unsupervised models. We compare our work with state of the art query expansion techniques, the results of which show promising robustness and improved precision.
Close
https://ls3.rnet.torontomu.ca/s10844-017-0466-3-1/
doi:https://doi.org/10.1007/s10844-017-0466-3
Close
Cuzzola, John; Jovanovic, Jelena; Bagheri, Ebrahim
RysannMD: A Biomedical Semantic Annotator Balancing Speed and Accuracy Journal Article
In: Journal of Biomedical Informatics, 2017.
Abstract | Links | BibTeX | Tags:
@article{JBI2017,
title = {RysannMD: A Biomedical Semantic Annotator Balancing Speed and Accuracy},
author = {John Cuzzola and Jelena Jovanovic and Ebrahim Bagheri},
url = {https://ls3.rnet.torontomu.ca/j-jbi-2017-05-016/},
doi = {https://doi.org/10.1016/j.jbi.2017.05.016},
year = {2017},
date = {2017-01-01},
urldate = {2017-01-01},
journal = {Journal of Biomedical Informatics},
abstract = {Recently, both researchers and practitioners have explored the possibility of semantically annotating large and continuously evolving collections of biomedical texts such as research papers, medical reports, and physician notes in order to enable their efficient and effective management and use in clinical practice or research laboratories. Such annotations can be automatically generated by biomedical semantic annotators - tools that are specifically designed for detecting and disambiguating biomedical concepts mentioned in text. The biomedical community has already presented several solid automated semantic annotators. However, the existing tools are either strong in their disambiguation capacity, i.e., the ability to identify the correct biomedical concept for a given piece of text among several candidate concepts, or they excel in their processing time, i.e., work very efficiently, but none of the semantic annotation tools reported in the literature has both of these qualities. In this paper, we present RysannMD (Ryerson Semantic Annotator for Medical Domain), a biomedical semantic annotation tool that strikes a balance between processing time and performance while disambiguating biomedical terms. In other words, RysannMD provides reasonable disambiguation performance when choosing the right sense for a biomedical term in a given context, and does that in a reasonable time. To examine how RysannMD stands with respect to the state of the art biomedical semantic annotators, we have conducted a series of experiments using standard benchmarking corpora, including both gold and silver standards, and four modern biomedical semantic annotators, namely cTAKES, MetaMap, NOBLE Coder, and Neji. The annotators were compared with respect to the quality of the produced annotations measured against gold and silver standards using precision, recall, and F1 measure and speed, i.e., processing time. In the experiments, RysannMD achieved the best median F1 measure across the benchmarking corpora, independent of the standard used (silver/gold), biomedical subdomain, and document size. In terms of the annotation speed, RysannMD scored the second best median processing time across all the experiments. The obtained results indicate that RysannMD offers the best performance among the examined semantic annotators when both quality of annotation and speed are considered simultaneously.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}

Close
Recently, both researchers and practitioners have explored the possibility of semantically annotating large and continuously evolving collections of biomedical texts such as research papers, medical reports, and physician notes in order to enable their efficient and effective management and use in clinical practice or research laboratories. Such annotations can be automatically generated by biomedical semantic annotators - tools that are specifically designed for detecting and disambiguating biomedical concepts mentioned in text. The biomedical community has already presented several solid automated semantic annotators. However, the existing tools are either strong in their disambiguation capacity, i.e., the ability to identify the correct biomedical concept for a given piece of text among several candidate concepts, or they excel in their processing time, i.e., work very efficiently, but none of the semantic annotation tools reported in the literature has both of these qualities. In this paper, we present RysannMD (Ryerson Semantic Annotator for Medical Domain), a biomedical semantic annotation tool that strikes a balance between processing time and performance while disambiguating biomedical terms. In other words, RysannMD provides reasonable disambiguation performance when choosing the right sense for a biomedical term in a given context, and does that in a reasonable time. To examine how RysannMD stands with respect to the state of the art biomedical semantic annotators, we have conducted a series of experiments using standard benchmarking corpora, including both gold and silver standards, and four modern biomedical semantic annotators, namely cTAKES, MetaMap, NOBLE Coder, and Neji. The annotators were compared with respect to the quality of the produced annotations measured against gold and silver standards using precision, recall, and F1 measure and speed, i.e., processing time. In the experiments, RysannMD achieved the best median F1 measure across the benchmarking corpora, independent of the standard used (silver/gold), biomedical subdomain, and document size. In terms of the annotation speed, RysannMD scored the second best median processing time across all the experiments. The obtained results indicate that RysannMD offers the best performance among the examined semantic annotators when both quality of annotation and speed are considered simultaneously.
Close
https://ls3.rnet.torontomu.ca/j-jbi-2017-05-016/
doi:https://doi.org/10.1016/j.jbi.2017.05.016
Close
Vo, Duc-Thuan; Bagheri, Ebrahim
Self-Training on Refined Clause Patterns for Relation Extraction Journal Article
In: Information Processing and Management, 2017.
Abstract | Links | BibTeX | Tags:
@article{IMP2017,
title = {Self-Training on Refined Clause Patterns for Relation Extraction},
author = {Duc-Thuan Vo and Ebrahim Bagheri},
url = {https://ls3.rnet.torontomu.ca/j-ipm-2017-02-009-1/},
doi = {https://doi.org/10.1016/j.ipm.2017.02.009},
year = {2017},
date = {2017-01-01},
urldate = {2017-01-01},
journal = {Information Processing and Management},
abstract = {Within the context of Information Extraction, relation extraction is oriented toward identifying a variety of relation phrases and their arguments in arbitrary sentences. In this paper, we present a clause-based framework for information extraction in textual documents. Our framework focuses on two important challenges in information extraction: 1) Open Information Extraction and (OIE) 2) Relation Extraction (RE). In the plethora of research that focus on the use of syntactic and dependency parsing for the purposes of detecting relations, there has been increasing evidence of incoherent and uninformative extractions. The extracted relations may even be erroneous at times and fail to provide a meaningful interpretation. In our work, we use the English clause structure and clause types in an effort to generate propositions that can be deemed as extractable relations. Moreover, we propose refinements to the grammatical structure of syntactic and dependency parsing that helps reduce the number of incoherent and uninformative extractions from clauses. In our experiments both in the open information extraction and relation extraction domains, we carefully evaluate our system on various benchmark datasets and compare the performance of our work against existing state-of-the-art information extraction systems. Our work shows improved performance compared to the state of the art techniques.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}

Close
Within the context of Information Extraction, relation extraction is oriented toward identifying a variety of relation phrases and their arguments in arbitrary sentences. In this paper, we present a clause-based framework for information extraction in textual documents. Our framework focuses on two important challenges in information extraction: 1) Open Information Extraction and (OIE) 2) Relation Extraction (RE). In the plethora of research that focus on the use of syntactic and dependency parsing for the purposes of detecting relations, there has been increasing evidence of incoherent and uninformative extractions. The extracted relations may even be erroneous at times and fail to provide a meaningful interpretation. In our work, we use the English clause structure and clause types in an effort to generate propositions that can be deemed as extractable relations. Moreover, we propose refinements to the grammatical structure of syntactic and dependency parsing that helps reduce the number of incoherent and uninformative extractions from clauses. In our experiments both in the open information extraction and relation extraction domains, we carefully evaluate our system on various benchmark datasets and compare the performance of our work against existing state-of-the-art information extraction systems. Our work shows improved performance compared to the state of the art techniques.
Close
https://ls3.rnet.torontomu.ca/j-ipm-2017-02-009-1/
doi:https://doi.org/10.1016/j.ipm.2017.02.009
Close
Jovanovic, Jelena; Bagheri, Ebrahim
Semantic Annotation in Biomedicine: The Current Landscape Journal Article
In: Journal of Biomedical Semantics, 2017.
Abstract | Links | BibTeX | Tags:
@article{jbsm2017,
title = {Semantic Annotation in Biomedicine: The Current Landscape},
author = {Jelena Jovanovic and Ebrahim Bagheri},
url = {https://ls3.rnet.torontomu.ca/s13326-017-0153-x/},
doi = {https://doi.org/10.1186/s13326-017-0153-x},
year = {2017},
date = {2017-01-01},
urldate = {2017-01-01},
journal = {Journal of Biomedical Semantics},
abstract = {The abundance and unstructured nature of biomedical texts, be it clinical or research content, impose significant challenges for the effective and efficient use of information and knowledge stored in such texts. Annotation of biomedical documents with machine intelligible semantics facilitates the advanced, semantics-based text management, curation, indexing, and search. This paper focuses on annotation of biomedical entity mentions with concepts from relevant biomedical knowledge bases such as UMLS, so that the meaning of those mentions is unambiguously and explicitly defined, and thus made readily available for automated processing. This process is widely known as semantic annotation, and the tools that perform it are known as semantic annotators. Over the last dozen years, biomedical research community has invested significant efforts in the development of biomedical semantic annotation technology. Aiming to establish grounds for further developments in this area, we review a selected set of state of the art biomedical semantic annotators, focusing particularly on general purpose annotators, that is, semantic annotation tools that can be customized to work with texts from any area of biomedicine. We also examine potential directions for further improvements of today's annotators which could make them even more capable of meeting the needs of real-world applications. To motivate and encourage further developments in this area, along the suggested and/or related directions, we review existing and potential practical applications and benefits of semantic annotators.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}

Close
The abundance and unstructured nature of biomedical texts, be it clinical or research content, impose significant challenges for the effective and efficient use of information and knowledge stored in such texts. Annotation of biomedical documents with machine intelligible semantics facilitates the advanced, semantics-based text management, curation, indexing, and search. This paper focuses on annotation of biomedical entity mentions with concepts from relevant biomedical knowledge bases such as UMLS, so that the meaning of those mentions is unambiguously and explicitly defined, and thus made readily available for automated processing. This process is widely known as semantic annotation, and the tools that perform it are known as semantic annotators. Over the last dozen years, biomedical research community has invested significant efforts in the development of biomedical semantic annotation technology. Aiming to establish grounds for further developments in this area, we review a selected set of state of the art biomedical semantic annotators, focusing particularly on general purpose annotators, that is, semantic annotation tools that can be customized to work with texts from any area of biomedicine. We also examine potential directions for further improvements of today's annotators which could make them even more capable of meeting the needs of real-world applications. To motivate and encourage further developments in this area, along the suggested and/or related directions, we review existing and potential practical applications and benefits of semantic annotators.
Close
https://ls3.rnet.torontomu.ca/s13326-017-0153-x/
doi:https://doi.org/10.1186/s13326-017-0153-x
Close
Feng, Yue; Bagheri, Ebrahim; Ensan, Faezeh; Jovanovic, Jelena
The State of the Art in Semantic Relatedness: A Framework for Comparison Journal Article
In: The Knowledge Engineering Review, 2017.
Abstract | Links | BibTeX | Tags:
@article{ker2017,
title = {The State of the Art in Semantic Relatedness: A Framework for Comparison},
author = {Yue Feng and Ebrahim Bagheri and Faezeh Ensan and Jelena Jovanovic},
url = {https://ls3.rnet.torontomu.ca/s0269888917000029/},
doi = {https://doi.org/10.1017/S0269888917000029},
year = {2017},
date = {2017-01-01},
urldate = {2017-01-01},
journal = {The Knowledge Engineering Review},
abstract = {Semantic relatedness (SR) is a form of measurement that quantitatively identifies the relationship between two words or concepts based on the similarity or closeness of their meaning. In the recent years, there have been noteworthy efforts to compute semantic relatedness between pairs of words or concepts by exploiting various knowledge resources such as linguistically-structured（e.g., WordNet) and collaboratively-developed knowledge bases (e.g., Wikipedia), among others. The existing approaches rely on different methods for utilizing these knowledge resources, for instance, methods that depend on the path between two words, or a vector representation of the word descriptions. The purpose of this paper is to review and present the state of the art in semantic relatedness research through a hierarchical framework. The dimensions of the proposed framework cover three main aspects of semantic relatedness approaches including the resources they rely on, the computational methods applied on the resources for developing a relatedness metric, and the evaluation models that are used for measuring their effectiveness. We have selected fourteen representative SR approaches to be analyzed using our framework. We compare and critically review each of them through the dimensions of our framework , thus, identifying strengths and weaknesses of each approach. In addition, we provide guidelines for researchers and practitioners on how to select the most relevant SR method for their purpose. Finally, based on the comparative analysis of the reviewed relatedness measures, we identify existing challenges and potentially valuable future research directions in this domain.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}

Close
Semantic relatedness (SR) is a form of measurement that quantitatively identifies the relationship between two words or concepts based on the similarity or closeness of their meaning. In the recent years, there have been noteworthy efforts to compute semantic relatedness between pairs of words or concepts by exploiting various knowledge resources such as linguistically-structured（e.g., WordNet) and collaboratively-developed knowledge bases (e.g., Wikipedia), among others. The existing approaches rely on different methods for utilizing these knowledge resources, for instance, methods that depend on the path between two words, or a vector representation of the word descriptions. The purpose of this paper is to review and present the state of the art in semantic relatedness research through a hierarchical framework. The dimensions of the proposed framework cover three main aspects of semantic relatedness approaches including the resources they rely on, the computational methods applied on the resources for developing a relatedness metric, and the evaluation models that are used for measuring their effectiveness. We have selected fourteen representative SR approaches to be analyzed using our framework. We compare and critically review each of them through the dimensions of our framework , thus, identifying strengths and weaknesses of each approach. In addition, we provide guidelines for researchers and practitioners on how to select the most relevant SR method for their purpose. Finally, based on the comparative analysis of the reviewed relatedness measures, we identify existing challenges and potentially valuable future research directions in this domain.
Close
https://ls3.rnet.torontomu.ca/s0269888917000029/
doi:https://doi.org/10.1017/S0269888917000029
Close
Noorian, Mahdi; Bagheri, Ebrahim; Du, Weichang
Toward Automated Quality-centric Product Line Configuration using Intentional Variability journal = Journal of Software: Evolution and Process Journal Article
In: 2017.
Links | BibTeX | Tags:
@article{JSME2017,
title = {Toward Automated Quality-centric Product Line Configuration using Intentional Variability journal = Journal of Software: Evolution and Process},
author = {Mahdi Noorian and Ebrahim Bagheri and Weichang Du},
url = {http://onlinelibrary.wiley.com/journal/10.1002/(ISSN)2047-7481 abstract = Software Product Line Engineering (SPLE) is a discipline which facilitates a systematic reuse-basedapproach by formally representing commonalities and variabilities between the applications of a target domain. As one of the main artifact of the software product line, a feature model represents the possible configuration space and can be customized based on the stakeholders’ needs. Considering the complexity of the variabilities represented by feature models and the diversity of the stakeholders’ expectations, the configuration process can be viewed as a complex optimization problem. In previous research, researchers have bridged the gap between requirement and product line engineering by integrating feature models and goal models. In this paper, we propose an approach for the configuration process that seeks to satisfy the stakeholders’ requirements as well as the feature models’ structural and integrity constraints. We model stakeholders’ functional and non-functional needs and their preferences using requirement engineering goal models. We formalize the structure of the feature model, the stakeholders’ objectives, and their preferences in the form of an Integer Linear Program in order to conduct a semi automated feature model configuration process. Our experimental results show that the proposed configuration framework is scalable when considering both functional and non-functional requirements of stakeholders},
year = {2017},
date = {2017-01-01},
keywords = {},
pubstate = {published},
tppubtype = {article}
}

Close
a feature model represents the possible configuration space and can be customize[...]
Close
Pourmasoumi, Asef; Bagheri, Ebrahim
Business process mining Book Section
In: Encyclopedia with Semantic Computing and Robotic Intelligence, vol. 01, no. 01, pp. 1630004 [8], 2017.
Abstract | Links | BibTeX | Tags:
@incollection{escri/PourmasoumiB17,
title = {Business process mining},
author = {Asef Pourmasoumi and Ebrahim Bagheri},
url = {http://www.worldscientific.com/doi/abs/10.1142/S2425038416300044},
doi = {10.1142/S2425038416300044},
year = {2017},
date = {2017-01-01},
booktitle = {Encyclopedia with Semantic Computing and Robotic Intelligence},
volume = {01},
number = {01},
pages = {1630004 [8]},
abstract = {One of the most valuable assets of an organization is its organizational data. The analysis and mining of this potential hidden treasure can lead to much added-value for the organization. Process mining is an emerging area that can be useful in helping organizations understand the status quo, check for compliance and plan for improving their processes. The aim of process mining is to extract knowledge from event logs of today’s organizational information systems. Process mining includes three main types: discovering process models from event logs, conformance checking and organizational mining. In this paper, we briefly introduce process mining and review some of its most important techniques. Also, we investigate some of the applications of process mining in industry and present some of the most important challenges that are faced in this area.},
keywords = {},
pubstate = {published},
tppubtype = {incollection}
}

Close
One of the most valuable assets of an organization is its organizational data. The analysis and mining of this potential hidden treasure can lead to much added-value for the organization. Process mining is an emerging area that can be useful in helping organizations understand the status quo, check for compliance and plan for improving their processes. The aim of process mining is to extract knowledge from event logs of today’s organizational information systems. Process mining includes three main types: discovering process models from event logs, conformance checking and organizational mining. In this paper, we briefly introduce process mining and review some of its most important techniques. Also, we investigate some of the applications of process mining in industry and present some of the most important challenges that are faced in this area.
Close
http://www.worldscientific.com/doi/abs/10.1142/S2425038416300044
doi:10.1142/S2425038416300044
Close
Fani, Hossein; Bagheri, Ebrahim
Community detection in social networks Book Section
In: Encyclopedia with Semantic Computing and Robotic Intelligence, vol. 01, no. 01, pp. 1630001 [8], 2017.
Abstract | Links | BibTeX | Tags:
@incollection{escri/FaniB17,
title = {Community detection in social networks},
author = {Hossein Fani and Ebrahim Bagheri},
url = {http://www.worldscientific.com/doi/abs/10.1142/S2425038416300019},
doi = {10.1142/S2425038416300019},
year = {2017},
date = {2017-01-01},
booktitle = {Encyclopedia with Semantic Computing and Robotic Intelligence},
volume = {01},
number = {01},
pages = {1630001 [8]},
abstract = {Online social networks have become a fundamental part of the global online experience. They facilitate different modes of communication and social interactions, enabling individuals to play social roles that they regularly undertake in real social settings. In spite of the heterogeneity of the users and interactions, these networks exhibit common properties. For instance, individuals tend to associate with others who share similar interests, a tendency often known as homophily, leading to the formation of communities. This entry aims to provide an overview of the definitions for an online community and review different community detection methods in social networks. Finding communities are beneficial since they provide summarization of network structure, highlighting the main properties of the network. Moreover, it has applications in sociology, biology, marketing and computer science which help scientists identify and extract actionable insight.},
keywords = {},
pubstate = {published},
tppubtype = {incollection}
}

Close
Online social networks have become a fundamental part of the global online experience. They facilitate different modes of communication and social interactions, enabling individuals to play social roles that they regularly undertake in real social settings. In spite of the heterogeneity of the users and interactions, these networks exhibit common properties. For instance, individuals tend to associate with others who share similar interests, a tendency often known as homophily, leading to the formation of communities. This entry aims to provide an overview of the definitions for an online community and review different community detection methods in social networks. Finding communities are beneficial since they provide summarization of network structure, highlighting the main properties of the network. Moreover, it has applications in sociology, biology, marketing and computer science which help scientists identify and extract actionable insight.
Close
http://www.worldscientific.com/doi/abs/10.1142/S2425038416300019
doi:10.1142/S2425038416300019
Close
Zarrinkalam, Fattane; Bagheri, Ebrahim
Event identification in social networks Book Section
In: Encyclopedia with Semantic Computing and Robotic Intelligence, vol. 01, no. 01, pp. 1630002 [7], 2017.
Abstract | Links | BibTeX | Tags:
@incollection{escri/ZarrinkalamB17,
title = {Event identification in social networks},
author = {Fattane Zarrinkalam and Ebrahim Bagheri},
url = {https://arxiv.org/pdf/1606.08521},
doi = {10.1142/S2425038416300020},
year = {2017},
date = {2017-01-01},
urldate = {2017-01-01},
booktitle = {Encyclopedia with Semantic Computing and Robotic Intelligence},
volume = {01},
number = {01},
pages = {1630002 [7]},
abstract = {Social networks enable users to freely communicate with each other and share their recent news, ongoing activities or views about different topics. As a result, they can be seen as a potentially viable source of information to understand the current emerging topics/events. The ability to model emerging topics is a substantial step to monitor and summarize the information originating from social sources. Applying traditional methods for event detection which are often proposed for processing large, formal and structured documents, are less effective, due to the short length, noisiness and informality of the social posts. Recent event detection techniques address these challenges by exploiting the opportunities behind abundant information available in social networks. This article provides an overview of the state of the art in event detection from social networks.},
keywords = {},
pubstate = {published},
tppubtype = {incollection}
}

Close
Social networks enable users to freely communicate with each other and share their recent news, ongoing activities or views about different topics. As a result, they can be seen as a potentially viable source of information to understand the current emerging topics/events. The ability to model emerging topics is a substantial step to monitor and summarize the information originating from social sources. Applying traditional methods for event detection which are often proposed for processing large, formal and structured documents, are less effective, due to the short length, noisiness and informality of the social posts. Recent event detection techniques address these challenges by exploiting the opportunities behind abundant information available in social networks. This article provides an overview of the state of the art in event detection from social networks.
Close
https://arxiv.org/pdf/1606.08521
doi:10.1142/S2425038416300020
Close
Feng, Yue; Bagheri, Ebrahim
Methods and resources for computing semantic relatedness Book Section
In: Encyclopedia with Semantic Computing and Robotic Intelligence, vol. 01, no. 01, pp. 1630005 [5], 2017.
Abstract | Links | BibTeX | Tags:
@incollection{escri/FengB17,
title = {Methods and resources for computing semantic relatedness},
author = {Yue Feng and Ebrahim Bagheri},
url = {http://community.worldfolios.com/public/Sample.pdf},
doi = {10.1142/S2425038416300056},
year = {2017},
date = {2017-01-01},
urldate = {2017-01-01},
booktitle = {Encyclopedia with Semantic Computing and Robotic Intelligence},
volume = {01},
number = {01},
pages = {1630005 [5]},
abstract = {Semantic relatedness (SR) is defined as a measurement that quantitatively identifies some form of lexical or functional association
between two words or concepts based on the contextual or semantic similarity of those two words regardless of their syntactical
differences. Section 1 of the entry outlines the working definition of SR and its applications and challenges. Section 2 identifies the
knowledge resources that are popular among SR methods. Section 3 reviews the primary measurements used to calculate SR.
Section 4 reviews the evaluation methodology which includes gold standard dataset and methods. Finally, Sec. 5 introduces further
reading.
In order to develop appropriate SR methods, there are three key aspects that need to be examined: (1) the knowledge resources
that are used as the source for extracting SR; (2) the methods that are used to quantify SR based on the adopted knowledge resource;
and (3) the datasets and methods that are used for evaluating SR techniques. The first aspect involves the selection of knowledge
bases such as WordNet or Wikipedia. Each knowledge base has its merits and downsides which can directly affect the accuracy and
the coverage of the SR method. The second aspect relies on different methods for utilizing the beforehand selected knowledge
resources, for example, methods that depend on the path between two words, or a vector representation of the word. As for the third
aspect, the evaluation for SR methods consists of two aspects, namely (1) the datasets that are used and (2) the various performance
measurement methods.
SR measures are increasingly applied in information retrieval to provide semantics between query and documents to reveal
relatedness between non-syntactically-related content. Researchers have already applied many different information and knowledge
sources in order to compute SR between two words. Empirical research has already shown that results of many of these SR
techniques have reasonable correlation with human subjects interpretation of relatedness between two words.},
keywords = {},
pubstate = {published},
tppubtype = {incollection}
}

Close
Semantic relatedness (SR) is defined as a measurement that quantitatively identifies some form of lexical or functional association
between two words or concepts based on the contextual or semantic similarity of those two words regardless of their syntactical
differences. Section 1 of the entry outlines the working definition of SR and its applications and challenges. Section 2 identifies the
knowledge resources that are popular among SR methods. Section 3 reviews the primary measurements used to calculate SR.
Section 4 reviews the evaluation methodology which includes gold standard dataset and methods. Finally, Sec. 5 introduces further
reading.
In order to develop appropriate SR methods, there are three key aspects that need to be examined: (1) the knowledge resources
that are used as the source for extracting SR; (2) the methods that are used to quantify SR based on the adopted knowledge resource;
and (3) the datasets and methods that are used for evaluating SR techniques. The first aspect involves the selection of knowledge
bases such as WordNet or Wikipedia. Each knowledge base has its merits and downsides which can directly affect the accuracy and
the coverage of the SR method. The second aspect relies on different methods for utilizing the beforehand selected knowledge
resources, for example, methods that depend on the path between two words, or a vector representation of the word. As for the third
aspect, the evaluation for SR methods consists of two aspects, namely (1) the datasets that are used and (2) the various performance
measurement methods.
SR measures are increasingly applied in information retrieval to provide semantics between query and documents to reveal
relatedness between non-syntactically-related content. Researchers have already applied many different information and knowledge
sources in order to compute SR between two words. Empirical research has already shown that results of many of these SR
techniques have reasonable correlation with human subjects interpretation of relatedness between two words.
Close
http://community.worldfolios.com/public/Sample.pdf
doi:10.1142/S2425038416300056
Close
Thuan, Vo-Duc; Bagheri, Ebrahim
Open information extraction Book Section
In: Encyclopedia with Semantic Computing and Robotic Intelligence, vol. 01, no. 01, pp. 1630003 [6], 2017.
Abstract | Links | BibTeX | Tags:
@incollection{escri/ThuanE17,
title = {Open information extraction},
author = {Vo-Duc Thuan and Ebrahim Bagheri},
url = {http://www.worldscientific.com/doi/abs/10.1142/S2425038416300032},
doi = {10.1142/S2425038416300032},
year = {2017},
date = {2017-01-01},
booktitle = {Encyclopedia with Semantic Computing and Robotic Intelligence},
volume = {01},
number = {01},
pages = {1630003 [6]},
abstract = {Open information extraction (Open IE) systems aim to obtain relation tuples with highly scalable extraction in portable across domain by identifying a variety of relation phrases and their arguments in arbitrary sentences. The first generation of Open IE learns linear chain models based on unlexicalized features such as Part-of-Speech (POS) or shallow tags to label the intermediate words between pair of potential arguments for identifying extractable relations. Open IE currently is developed in the second generation that is able to extract instances of the most frequently observed relation types such as Verb, Noun and Prep, Verb and Prep, and Infinitive with deep linguistic analysis. They expose simple yet principled ways in which verbs express relationships in linguistics such as verb phrase-based extraction or clause-based extraction. They obtain a significantly higher performance over previous systems in the first generation. In this paper, we describe an overview of two Open IE generations including strengths, weaknesses and application areas.},
keywords = {},
pubstate = {published},
tppubtype = {incollection}
}

Close
Open information extraction (Open IE) systems aim to obtain relation tuples with highly scalable extraction in portable across domain by identifying a variety of relation phrases and their arguments in arbitrary sentences. The first generation of Open IE learns linear chain models based on unlexicalized features such as Part-of-Speech (POS) or shallow tags to label the intermediate words between pair of potential arguments for identifying extractable relations. Open IE currently is developed in the second generation that is able to extract instances of the most frequently observed relation types such as Verb, Noun and Prep, Verb and Prep, and Infinitive with deep linguistic analysis. They expose simple yet principled ways in which verbs express relationships in linguistics such as verb phrase-based extraction or clause-based extraction. They obtain a significantly higher performance over previous systems in the first generation. In this paper, we describe an overview of two Open IE generations including strengths, weaknesses and application areas.
Close
http://www.worldscientific.com/doi/abs/10.1142/S2425038416300032
doi:10.1142/S2425038416300032
Close
Ensan, Faezeh; Bagheri, Ebrahim; Zouaq, Amal; Kouznetsov, Alexandre
An Empirical Study of Embedding Features in Learning to Rank Proceedings Article
In: The 26th ACM International Conference on Information and Knowledge Management (CIKM), 2017.
Abstract | Links | BibTeX | Tags:
@inproceedings{cikm17-2,
title = {An Empirical Study of Embedding Features in Learning to Rank},
author = {Faezeh Ensan and Ebrahim Bagheri and Amal Zouaq and Alexandre Kouznetsov},
url = {https://ls3.rnet.torontomu.ca/3132847-3133138/},
doi = {https://doi.org/10.1145/3132847.3133138},
year = {2017},
date = {2017-01-01},
urldate = {2017-01-01},
booktitle = {The 26th ACM International Conference on Information and Knowledge Management (CIKM)},
abstract = {This paper explores the possibility of using neural embedding features for enhancing the effectiveness of ad hoc document ranking based on learning to rank models. We have extensively introduced and investigated the effectiveness of features learnt based on word and document embeddings to represent both queries and documents. We employ several learning to rank methods for document ranking using embedding-based features, keyword-based features as well as the interpolation of the embedding-based features with keyword-based features. The results show that embedding features have a synergistic impact on keyword based features and are able to provide statistically significant improvement on harder queries.

},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
This paper explores the possibility of using neural embedding features for enhancing the effectiveness of ad hoc document ranking based on learning to rank models. We have extensively introduced and investigated the effectiveness of features learnt based on word and document embeddings to represent both queries and documents. We employ several learning to rank methods for document ranking using embedding-based features, keyword-based features as well as the interpolation of the embedding-based features with keyword-based features. The results show that embedding features have a synergistic impact on keyword based features and are able to provide statistically significant improvement on harder queries.

Close
https://ls3.rnet.torontomu.ca/3132847-3133138/
doi:https://doi.org/10.1145/3132847.3133138
Close
Ensan, Faezeh; Bagheri, Ebrahim
Document Retrieval through Semantic Entity Linking Proceedings Article
In: The Tenth International Conference on Web Search and Data Mining (WSDM’17), 2017.
Links | BibTeX | Tags:
@inproceedings{wsdm17,
title = {Document Retrieval through Semantic Entity Linking},
author = {Faezeh Ensan and Ebrahim Bagheri},
url = {https://lintool.github.io/robust04-analysis-papers/p181-ensan.pdf},
doi = {https://doi.org/10.1145/3018661.3018692},
year = {2017},
date = {2017-01-01},
urldate = {2017-01-01},
booktitle = {The Tenth International Conference on Web Search and Data Mining (WSDM’17)},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
https://lintool.github.io/robust04-analysis-papers/p181-ensan.pdf
doi:https://doi.org/10.1145/3018661.3018692
Close
Falavarjani, Seyed Amin Mirlohi; Hosseini, Hawre; Noorian, Zeinab; Bagheri, Ebrahim
Estimating the Effect of Exercising on Users' Online Behavior booktitle = International Workshop on Observational Studies Through Social Media (OSSM 2017) collocated with International AAAI Conference on Web and Social Media (ICWSM) Proceedings Article
In: 2017.
Abstract | Links | BibTeX | Tags:
@inproceedings{mirlohi-2017-1,
title = {Estimating the Effect of Exercising on Users' Online Behavior booktitle = International Workshop on Observational Studies Through Social Media (OSSM 2017) collocated with International AAAI Conference on Web and Social Media (ICWSM)},
author = {Seyed Amin Mirlohi Falavarjani and Hawre Hosseini and Zeinab Noorian and Ebrahim Bagheri},
url = {https://ls3.rnet.torontomu.ca/14975-article-text-18494-1-2-20201228/},
doi = {https://doi.org/10.1609/icwsm.v11i1.14975},
year = {2017},
date = {2017-01-01},
urldate = {2017-01-01},
abstract = {This study aims to estimate the influence of offline activity on users’ online behavior, relying on a matching method to reduce the effect of confounding variables. We analyze activities of 850 users who are active on both Twitter and Foursquare social networks. Users’ offline activity is extracted from Foursquare posts and users’ online behavior is extracted from Twitter posts. Users’ interests, representing their online behavior, are extracted with regards to a set of topics in several subsequent time intervals. The shift of users’ interests across different time intervals is taken as a measure of user behavior change on the social network. On the other hand, we employ user check-ins at a gym or fitness center as a sign of exercise and consider it to be an offline activity. In order to find the effect of exercise on online behavior, we identify users who did not go to the gym for at least two months but did so at least nine times in the next three months. We show that shift in interest reduces significantly for users after they start exercising, which implies that the offline activity of exercising can influence how users’ interests are shaped and change on the social network over time.
},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
This study aims to estimate the influence of offline activity on users’ online behavior, relying on a matching method to reduce the effect of confounding variables. We analyze activities of 850 users who are active on both Twitter and Foursquare social networks. Users’ offline activity is extracted from Foursquare posts and users’ online behavior is extracted from Twitter posts. Users’ interests, representing their online behavior, are extracted with regards to a set of topics in several subsequent time intervals. The shift of users’ interests across different time intervals is taken as a measure of user behavior change on the social network. On the other hand, we employ user check-ins at a gym or fitness center as a sign of exercise and consider it to be an offline activity. In order to find the effect of exercise on online behavior, we identify users who did not go to the gym for at least two months but did so at least nine times in the next three months. We show that shift in interest reduces significantly for users after they start exercising, which implies that the offline activity of exercising can influence how users’ interests are shaped and change on the social network over time.

Close
https://ls3.rnet.torontomu.ca/14975-article-text-18494-1-2-20201228/
doi:https://doi.org/10.1609/icwsm.v11i1.14975
Close
Nguyen, Tam T; Bagheri, Ebrahim
Learning Event Count Models with Application to Affiliation Ranking Proceedings Article
In: 27th Annual International Conference on Computer Science and Software Engineering (CASCON 2017), 2017.
Links | BibTeX | Tags:
@inproceedings{cascon2017,
title = {Learning Event Count Models with Application to Affiliation Ranking},
author = {Tam T Nguyen and Ebrahim Bagheri},
url = {https://www.ibm.com/ibm/cas/cascon/},
year = {2017},
date = {2017-01-01},
booktitle = {27th Annual International Conference on Computer Science and Software Engineering (CASCON 2017)},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
https://www.ibm.com/ibm/cas/cascon/
Close
Nguyen, Tam T; Fernandez, Daniel; Nguyen, Quy T K; Bagheri, Ebrahim
Location-aware Human Activity Recognition Proceedings Article
In: The 13th International Conference on Advanced Data Mining and Applications (ADMA'17), 2017.
Abstract | Links | BibTeX | Tags:
@inproceedings{adma2017,
title = {Location-aware Human Activity Recognition},
author = {Tam T Nguyen and Daniel Fernandez and Quy T K Nguyen and Ebrahim Bagheri},
doi = {https://doi.org/10.1007/978-3-319-69179-4_58},
year = {2017},
date = {2017-01-01},
urldate = {2017-01-01},
booktitle = {The 13th International Conference on Advanced Data Mining and Applications (ADMA'17)},
abstract = {In this paper, we present one of the winning solutions of an international human activity recognition challenge organized by DrivenData in conjunction with the European Conference on Machine Learning and Principles and Practice of Knowledge Discovery in Databases. The objective of the challenge was to predict activities of daily living and posture or ambulation based on wrist-worn accelerometer, RGB-D camera, and passive environmental sensor data, which was collected from a smart home in the UK. Most of the state of the art research focus on one type of data, e.g., wearable sensor data, for making predictions and overlook the usefulness of user locations for this purpose. In our work, we propose a novel approach that leverages heterogeneous data types as well as user locations for building predictive models. Note that while we do not have actual location information but we build models to predict location using machine learning models and use the predictions in user activity recognition. Compared to the state of the art, our proposed approach is able to achieve a 38% improvement with a Brier score of 0.1346. This means that roughly 9 out of 10 predictions matched the human-labeled descriptions.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
In this paper, we present one of the winning solutions of an international human activity recognition challenge organized by DrivenData in conjunction with the European Conference on Machine Learning and Principles and Practice of Knowledge Discovery in Databases. The objective of the challenge was to predict activities of daily living and posture or ambulation based on wrist-worn accelerometer, RGB-D camera, and passive environmental sensor data, which was collected from a smart home in the UK. Most of the state of the art research focus on one type of data, e.g., wearable sensor data, for making predictions and overlook the usefulness of user locations for this purpose. In our work, we propose a novel approach that leverages heterogeneous data types as well as user locations for building predictive models. Note that while we do not have actual location information but we build models to predict location using machine learning models and use the predictions in user activity recognition. Compared to the state of the art, our proposed approach is able to achieve a 38% improvement with a Brier score of 0.1346. This means that roughly 9 out of 10 predictions matched the human-labeled descriptions.
Close
doi:https://doi.org/10.1007/978-3-319-69179-4_58
Close
Vo, Duc-Thuan; Bagheri, Ebrahim
Matrix Models with Feature Enrichment for Relation Extraction Proceedings Article
In: Advances in Artificial Intelligence - 30th Canadian Conference on Artificial Intelligence, Canadian AI 2017, Edmonton, AB, Canada, May 16 - May 19, 2017. Proceedings, 2017.
Links | BibTeX | Tags:
@inproceedings{canadianAI17,
title = {Matrix Models with Feature Enrichment for Relation Extraction},
author = {Duc-Thuan Vo and Ebrahim Bagheri},
doi = {https://doi.org/10.1007/978-3-319-57351-9_28},
year = {2017},
date = {2017-01-01},
urldate = {2017-01-01},
booktitle = {Advances in Artificial Intelligence - 30th Canadian Conference on
Artificial Intelligence, Canadian AI 2017, Edmonton, AB, Canada,
May 16 - May 19, 2017. Proceedings},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
doi:https://doi.org/10.1007/978-3-319-57351-9_28
Close
Zarrinkalam, Fattane; Fani, Hossein; Bagheri, Ebrahim; Kahani, Mohsen
Predicting Users’ Future Interests on Twitter Proceedings Article
In: Advances in Information Retrieval: 39th European Conference on IR Research, ECIR 2017, Aberdeen, UK, April 8-13, 2017, Proceedings, pp. 464–476, 2017.
Abstract | Links | BibTeX | Tags:
@inproceedings{DBLP:conf/ecir/ZarrinkalamFBK17,
title = {Predicting Users’ Future Interests on Twitter},
author = {Fattane Zarrinkalam and Hossein Fani and Ebrahim Bagheri and Mohsen Kahani},
url = {https://hosseinfani.github.io/res/papers/Predicting%20Users%20Future%20Interests%20on%20Twitter.pdf},
doi = {10.1007/978-3-319-56608-5_36},
year = {2017},
date = {2017-01-01},
urldate = {2017-01-01},
booktitle = {Advances in Information Retrieval: 39th European Conference on IR Research, ECIR 2017, Aberdeen, UK, April 8-13, 2017, Proceedings},
pages = {464--476},
crossref = {DBLP:conf/ecir/2017},
abstract = {In this paper, we address the problem of predicting future interests of users with regards to a set of unobserved topics in microblogging services which enables forward planning based on potential future interests. Existing works in the literature that operate based on a known interest space cannot be directly applied to solve this problem. Such methods require at least a minimum user interaction with the topic to perform prediction. To tackle this problem, we integrate the semantic information derived from the Wikipedia category structure and the temporal evolution of user’s interests into our prediction model. More specifically, to capture the temporal behaviour of the topics and user’s interests, we consider discrete intervals and build user’s topic profile in each time interval separately. Then, we generalize users’ interests that have been observed over several time intervals by transferring them over the Wikipedia category structure. Our approach not only allows us to generalize users’ interests but also enables us to transfer users’ interests across different time intervals that do not necessarily have the same set of topics. Our experiments illustrate the superiority of our model compared to the state of the art.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
In this paper, we address the problem of predicting future interests of users with regards to a set of unobserved topics in microblogging services which enables forward planning based on potential future interests. Existing works in the literature that operate based on a known interest space cannot be directly applied to solve this problem. Such methods require at least a minimum user interaction with the topic to perform prediction. To tackle this problem, we integrate the semantic information derived from the Wikipedia category structure and the temporal evolution of user’s interests into our prediction model. More specifically, to capture the temporal behaviour of the topics and user’s interests, we consider discrete intervals and build user’s topic profile in each time interval separately. Then, we generalize users’ interests that have been observed over several time intervals by transferring them over the Wikipedia category structure. Our approach not only allows us to generalize users’ interests but also enables us to transfer users’ interests across different time intervals that do not necessarily have the same set of topics. Our experiments illustrate the superiority of our model compared to the state of the art.
Close
https://hosseinfani.github.io/res/papers/Predicting%20Users%20Future%20Interests[...]
doi:10.1007/978-3-319-56608-5_36
Close
Bashari, Mahdi; Bagheri, Ebrahim; Du, Weichang
Self-healing in Service Mashups Through Feature Adaptation Proceedings Article
In: 21st International Systems and Software Product Line Conference (SPLC 2017), 2017.
Abstract | Links | BibTeX | Tags:
@inproceedings{Bashari2017,
title = {Self-healing in Service Mashups Through Feature Adaptation},
author = {Mahdi Bashari and Ebrahim Bagheri and Weichang Du},
url = {https://ls3.rnet.torontomu.ca/3106195-3106215/},
doi = {https://doi.org/10.1145/3106195.3106215},
year = {2017},
date = {2017-01-01},
urldate = {2017-01-01},
booktitle = {21st International Systems and Software Product Line Conference (SPLC 2017)},
abstract = {The composition of the functionality of multiple services into a single unique service mashup has received wide interest in the recent years. Given the distributed nature of these mashups where the constituent services can be located on different servers, it is possible that a change in the functionality or availability of a constituent service result in the failure of the service mashup. In this paper, we propose a novel method based on the Software Product Line Engineering (SPLE) paradigm which is able to find an alternate valid service mashup which has maximum possible number of original service mashup features in order to mitigate a service failure when complete recovery is not possible. This method also has an advantage that it can recover or mitigate the failure automatically without requiring the user to specify any adaptation rule or strategy. We show the practicality of our proposed approach through extensive experiments.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
The composition of the functionality of multiple services into a single unique service mashup has received wide interest in the recent years. Given the distributed nature of these mashups where the constituent services can be located on different servers, it is possible that a change in the functionality or availability of a constituent service result in the failure of the service mashup. In this paper, we propose a novel method based on the Software Product Line Engineering (SPLE) paradigm which is able to find an alternate valid service mashup which has maximum possible number of original service mashup features in order to mitigate a service failure when complete recovery is not possible. This method also has an advantage that it can recover or mitigate the failure automatically without requiring the user to specify any adaptation rule or strategy. We show the practicality of our proposed approach through extensive experiments.
Close
https://ls3.rnet.torontomu.ca/3106195-3106215/
doi:https://doi.org/10.1145/3106195.3106215
Close
Fani, Hossein; Bagheri, Ebrahim; Du, Weichang
Temporally Like-minded User Community Identification through Neural Embeddings Proceedings Article
In: The 26th ACM International Conference on Information and Knowledge Management (CIKM), 2017.
Abstract | Links | BibTeX | Tags:
@inproceedings{cikm17-1,
title = {Temporally Like-minded User Community Identification through Neural Embeddings},
author = {Hossein Fani and Ebrahim Bagheri and Weichang Du},
url = {https://hosseinfani.github.io/res/papers/Temporally%20Like-minded%20User%20Community%20Identification%20through%20Neural%20Embeddings.pdf},
doi = {https://doi.org/10.1145/3132847.3132955},
year = {2017},
date = {2017-01-01},
urldate = {2017-01-01},
booktitle = {The 26th ACM International Conference on Information and Knowledge Management (CIKM)},
abstract = {We propose a neural embedding approach to identify temporally like-minded user communities, i.e., those communities of users who have similar temporal alignment in their topics of interest. Like-minded user communities in social networks are usually identified by either considering explicit structural connections between users (link analysis), users' topics of interest expressed in their posted contents (content analysis), or in tandem. In such communities, however, the users' rich temporal behavior towards topics of interest is overlooked. Only few recent research efforts consider the time dimension and define like-minded user communities as groups of users who share not only similar topical interests but also similar temporal behavior. Temporal like-minded user communities find application in areas such as recommender systems where relevant items are recommended to the users at the right time. In this paper, we tackle the problem of identifying temporally like-minded user communities by leveraging unsupervised feature learning (embeddings). Specifically, we learn a mapping from the user space to a low-dimensional vector space of features that incorporate both topics of interest and their temporal nature. We demonstrate the efficacy of our proposed approach on a Twitter dataset in the context of three applications: news recommendation, user prediction and community selection, where our work is able to outperform the state-of-the-art on important information retrieval metrics.

},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
We propose a neural embedding approach to identify temporally like-minded user communities, i.e., those communities of users who have similar temporal alignment in their topics of interest. Like-minded user communities in social networks are usually identified by either considering explicit structural connections between users (link analysis), users' topics of interest expressed in their posted contents (content analysis), or in tandem. In such communities, however, the users' rich temporal behavior towards topics of interest is overlooked. Only few recent research efforts consider the time dimension and define like-minded user communities as groups of users who share not only similar topical interests but also similar temporal behavior. Temporal like-minded user communities find application in areas such as recommender systems where relevant items are recommended to the users at the right time. In this paper, we tackle the problem of identifying temporally like-minded user communities by leveraging unsupervised feature learning (embeddings). Specifically, we learn a mapping from the user space to a low-dimensional vector space of features that incorporate both topics of interest and their temporal nature. We demonstrate the efficacy of our proposed approach on a Twitter dataset in the context of three applications: news recommendation, user prediction and community selection, where our work is able to outperform the state-of-the-art on important information retrieval metrics.

Close
https://hosseinfani.github.io/res/papers/Temporally%20Like-minded%20User%20Commu[...]
doi:https://doi.org/10.1145/3132847.3132955
Close
2016
Lashkari, Fatemeh; Ensan, Faezeh; Bagheri, Ebrahim; Ghorbani, Ali A
Efficient Indexing for Semantic Search Journal Article
In: Expert Systems With Applications, 2016.
Abstract | Links | BibTeX | Tags:
@article{eswa2016,
title = {Efficient Indexing for Semantic Search},
author = {Fatemeh Lashkari and Faezeh Ensan and Ebrahim Bagheri and Ali A Ghorbani},
url = {http://www.journals.elsevier.com/expert-systems-with-applications/},
year = {2016},
date = {2016-01-01},
journal = {Expert Systems With Applications},
abstract = {The increasing performance and wider spread use of automated semantic annotation and entity linking platforms has empowered the possibility of using semantic information in information retrieval. While keyword-based information retrieval techniques have shown impressive performance, the addition of semantic information can increase retrieval performance by allowing for more accurate sense disambiguation, intent determination, and instance identification, just to name a few. Researchers have already delved into the possibility of integrating semantic information into practical search engines using a combination of techniques such as using graph databases, hybrid indices and adapted inverted indices, among others. One of the challenges with the efficient design of a search engine capable of considering semantic information is that it would need to be able to index information beyond the traditional information stored in inverted indices, including entity mentions and type relationships. The objective of our work in
this paper is to investigate various ways in which different data structure types can be adopted to integrate three types of information including keywords, entities and types. We will systematically compare the performance of the different data structures for scenarios where i) the same data structure types are adopted for the three types of information, and ii) different data structure types are integrated for storing and retrieving the three different information types. We report our findings in terms of the performance of various query processing tasks such as Boolean and ranked intersection for the different indices and discuss which index type would be appropriate under different conditions for semantic search.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}

Close
The increasing performance and wider spread use of automated semantic annotation and entity linking platforms has empowered the possibility of using semantic information in information retrieval. While keyword-based information retrieval techniques have shown impressive performance, the addition of semantic information can increase retrieval performance by allowing for more accurate sense disambiguation, intent determination, and instance identification, just to name a few. Researchers have already delved into the possibility of integrating semantic information into practical search engines using a combination of techniques such as using graph databases, hybrid indices and adapted inverted indices, among others. One of the challenges with the efficient design of a search engine capable of considering semantic information is that it would need to be able to index information beyond the traditional information stored in inverted indices, including entity mentions and type relationships. The objective of our work in
this paper is to investigate various ways in which different data structure types can be adopted to integrate three types of information including keywords, entities and types. We will systematically compare the performance of the different data structures for scenarios where i) the same data structure types are adopted for the three types of information, and ii) different data structure types are integrated for storing and retrieving the three different information types. We report our findings in terms of the performance of various query processing tasks such as Boolean and ranked intersection for the different indices and discuss which index type would be appropriate under different conditions for semantic search.
Close
http://www.journals.elsevier.com/expert-systems-with-applications/
Close
Pourmasoumi, Asef; Kahani, Mohsen; Bagheri, Ebrahim
Mining Variable Fragments from Process Event Logs Journal Article
In: Information Systems Frontiers, 2016.
Abstract | Links | BibTeX | Tags:
@article{isfi2016,
title = {Mining Variable Fragments from Process Event Logs},
author = {Asef Pourmasoumi and Mohsen Kahani and Ebrahim Bagheri},
url = {https://ls3.rnet.torontomu.ca/s10796-016-9662-x-1/},
doi = {https://doi.org/10.1007/s10796-016-9662-x},
year = {2016},
date = {2016-01-01},
urldate = {2016-01-01},
journal = {Information Systems Frontiers},
abstract = {Many peer-organizations are now using process-aware information systems for
managing their organizational processes. Most of these peer-organizations have
shared processes, which include many commonalities and some degrees of variability.
Analyzing and mining the commonalities of these processes can have many benefits
from the reusability point of view. In this paper, we propose an approach for extracting
common process fragments from a collection of event logs. To this end, we first
analyze the process fragment literature from a theo≈retical point of view, based on
which we present a new process fragment definition, called morphological fragments to
support composability and flexibility. Then we propose a novel algorithm for extracting
such morphological fragments directly from process event logs. This algorithm is
capable of eliciting common fragments from a family of processes that may not have
been executed within the same application/organization. We also propose supporting
algorithms for detecting and categorizing morphological fragments for the purpose of
reusability. Our empirical studies show that our approach is able to support reusability
and flexibility in process fragment identification.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}

Close
Many peer-organizations are now using process-aware information systems for
managing their organizational processes. Most of these peer-organizations have
shared processes, which include many commonalities and some degrees of variability.
Analyzing and mining the commonalities of these processes can have many benefits
from the reusability point of view. In this paper, we propose an approach for extracting
common process fragments from a collection of event logs. To this end, we first
analyze the process fragment literature from a theo≈retical point of view, based on
which we present a new process fragment definition, called morphological fragments to
support composability and flexibility. Then we propose a novel algorithm for extracting
such morphological fragments directly from process event logs. This algorithm is
capable of eliciting common fragments from a family of processes that may not have
been executed within the same application/organization. We also propose supporting
algorithms for detecting and categorizing morphological fragments for the purpose of
reusability. Our empirical studies show that our approach is able to support reusability
and flexibility in process fragment identification.
Close
https://ls3.rnet.torontomu.ca/s10796-016-9662-x-1/
doi:https://doi.org/10.1007/s10796-016-9662-x
Close
Poots, Kent; Bagheri, Ebrahim
Overview of Text Annotation with Pictures Journal Article
In: IEEE IT Professional, 2016.
Abstract | Links | BibTeX | Tags:
@article{itpro2016,
title = {Overview of Text Annotation with Pictures},
author = {Kent Poots and Ebrahim Bagheri},
url = {http://www.computer.org/web/computingnow/itpro},
year = {2016},
date = {2016-01-01},
journal = {IEEE IT Professional},
abstract = {The vast array of information available on the Web makes it a challenge for readers to quickly browse through and decide about the importance and relevance of content. Interpreting large-volumes of data is particularly demanding for users with handheld devices in the social media and micro-blogging sphere. Various approaches address this challenge through text summarization, content ranking and personalized recommendation. We describe a family of techniques that help users understand text by automatically annotating text with pictures, referred to as text picturing . The objective is to find a set of pictures that cover the main concepts in a textual snippet. We provide an overview of text picturing, its constituent steps such as knowledge extraction, map ping, scene rendering, as well as application areas. We give a picturing-related literature overview, and list use-cases that offer IT professionals insight into how picturing techniques can be successfully incorporated into real world applications.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}

Close
The vast array of information available on the Web makes it a challenge for readers to quickly browse through and decide about the importance and relevance of content. Interpreting large-volumes of data is particularly demanding for users with handheld devices in the social media and micro-blogging sphere. Various approaches address this challenge through text summarization, content ranking and personalized recommendation. We describe a family of techniques that help users understand text by automatically annotating text with pictures, referred to as text picturing . The objective is to find a set of pictures that cover the main concepts in a textual snippet. We provide an overview of text picturing, its constituent steps such as knowledge extraction, map ping, scene rendering, as well as application areas. We give a picturing-related literature overview, and list use-cases that offer IT professionals insight into how picturing techniques can be successfully incorporated into real world applications.
Close
http://www.computer.org/web/computingnow/itpro
Close
Ghashghaei, Mehrnaz; Bagheri, Ebrahim; Cuzzola, John; Ghorbani, Ali A; Noorian, Zeinab
Semantic Disambiguation and Linking of Quantitative Mentions in Textual Content Journal Article
In: Int. J. Semantic Computing, vol. 10, no. 1, pp. 121, 2016.
Abstract | Links | BibTeX | Tags:
@article{DBLP:journals/ijsc/GhashghaeiBCGN16,
title = {Semantic Disambiguation and Linking of Quantitative Mentions in Textual Content},
author = {Mehrnaz Ghashghaei and Ebrahim Bagheri and John Cuzzola and Ali A Ghorbani and Zeinab Noorian},
url = {https://ls3.rnet.torontomu.ca/de2f932b65a6afa313ab889104791cca39f7/},
doi = {http://dx.doi.org/10.1142/S1793351X16500021},
year = {2016},
date = {2016-01-01},
urldate = {2016-01-01},
journal = {Int. J. Semantic Computing},
volume = {10},
number = {1},
pages = {121},
abstract = {Semantic annotation techniques provide the basis for linking textual content with concepts in well grounded knowledge bases. In spite of their many application areas, current semantic annotation systems have some limitations. One of the prominent limitations of such systems is that none of the existing semantic annotator systems are able to identify and disambiguate quantitative (numerical) content. In textual documents such as Web pages, specially technical contents, there are many quantitative information such as product specifications that need to be semantically qualified. In this paper, we propose an approach for annotating quantitative values in short textual content. In our approach, we identify numeric values in the text and link them to an existing property in a knowledge base. Based on this mapping, we are then able to find the concept that the property is associated with, whereby identifying both the concept and the specific property of that concept that the numeric value belongs to. Results obtained from the developed gold standard dataset show that the proposed automated semantic annotation platform is quite effective in detecting and disambiguating numerical content, and connecting them to associated properties on the external knowledge base. Our experiments show that our proposed approach is able to reach an accuracy of over 70% for semantically annotating quantitative content.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}

Close
Semantic annotation techniques provide the basis for linking textual content with concepts in well grounded knowledge bases. In spite of their many application areas, current semantic annotation systems have some limitations. One of the prominent limitations of such systems is that none of the existing semantic annotator systems are able to identify and disambiguate quantitative (numerical) content. In textual documents such as Web pages, specially technical contents, there are many quantitative information such as product specifications that need to be semantically qualified. In this paper, we propose an approach for annotating quantitative values in short textual content. In our approach, we identify numeric values in the text and link them to an existing property in a knowledge base. Based on this mapping, we are then able to find the concept that the property is associated with, whereby identifying both the concept and the specific property of that concept that the numeric value belongs to. Results obtained from the developed gold standard dataset show that the proposed automated semantic annotation platform is quite effective in detecting and disambiguating numerical content, and connecting them to associated properties on the external knowledge base. Our experiments show that our proposed approach is able to reach an accuracy of over 70% for semantically annotating quantitative content.
Close
https://ls3.rnet.torontomu.ca/de2f932b65a6afa313ab889104791cca39f7/
doi:http://dx.doi.org/10.1142/S1793351X16500021
Close
Bagheri, Ebrahim; Ensan, Faezeh
Semantic tagging and linking of software engineering social content Journal Article
In: Autom. Softw. Eng., vol. 23, no. 2, pp. 147–190, 2016.
Links | BibTeX | Tags:
@article{DBLP:journals/ase/BagheriE16,
title = {Semantic tagging and linking of software engineering social content},
author = {Ebrahim Bagheri and Faezeh Ensan},
url = {https://ls3.rnet.torontomu.ca/s10515-014-0146-2/},
doi = { https://doi.org/10.1007/s10515-014-0146-2},
year = {2016},
date = {2016-01-01},
urldate = {2016-01-01},
journal = {Autom. Softw. Eng.},
volume = {23},
number = {2},
pages = {147--190},
keywords = {},
pubstate = {published},
tppubtype = {article}
}

Close
https://ls3.rnet.torontomu.ca/s10515-014-0146-2/
doi: https://doi.org/10.1007/s10515-014-0146-2
Close
Bashari, Mahdi; Bagheri, Ebrahim; Du, Weichang
Automated Composition of Service Mashups Through Software Product Line Engineering Proceedings Article
In: The 15th International Conference on Software Reuse (ICSR 2016), 2016.
Abstract | Links | BibTeX | Tags:
@inproceedings{Bashari20162,
title = {Automated Composition of Service Mashups Through Software Product Line Engineering},
author = {Mahdi Bashari and Ebrahim Bagheri and Weichang Du},
url = {https://ls3.rnet.torontomu.ca/bashari2016/},
doi = {https://doi.org/10.1007/978-3-319-35122-3_2},
year = {2016},
date = {2016-01-01},
urldate = {2016-01-01},
booktitle = {The 15th International Conference on Software Reuse (ICSR 2016)},
abstract = {The growing number of online resources, including data and services, has motivated both researchers and practitioners to provide methods and tools for non-expert end-users to create desirable applications by putting these resources together leading to the so called mashups. In this paper, we focus on a class of mashups referred to as service mashups. A service mashup is built from existing services such that the developed service mashup offers added-value through new functionalities. We propose an approach which adopts concepts from software product line engineering and automated AI planning to support the automated composition of service mashups. One of the advantages of our work is that it allows non-experts to build and optimize desired mashups with little knowledge of service composition. We report on the results of the experimentation that we have performed which support the practicality and scalability of our proposed work.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
The growing number of online resources, including data and services, has motivated both researchers and practitioners to provide methods and tools for non-expert end-users to create desirable applications by putting these resources together leading to the so called mashups. In this paper, we focus on a class of mashups referred to as service mashups. A service mashup is built from existing services such that the developed service mashup offers added-value through new functionalities. We propose an approach which adopts concepts from software product line engineering and automated AI planning to support the automated composition of service mashups. One of the advantages of our work is that it allows non-experts to build and optimize desired mashups with little knowledge of service composition. We report on the results of the experimentation that we have performed which support the practicality and scalability of our proposed work.
Close
https://ls3.rnet.torontomu.ca/bashari2016/
doi:https://doi.org/10.1007/978-3-319-35122-3_2
Close
Thuan, Vo-Duc; Bagheri, Ebrahim
Clause-based Open Information Extraction with Grammatical Structure Reformation Proceedings Article
In: 17th International Conference on Intelligent Text Processing and Computational Linguistics (CICLing'16), 2016.
Links | BibTeX | Tags:
@inproceedings{thuan2016-2,
title = {Clause-based Open Information Extraction with Grammatical Structure Reformation},
author = {Vo-Duc Thuan and Ebrahim Bagheri},
url = {http://www.cicling.org/2016/},
year = {2016},
date = {2016-01-01},
booktitle = {17th International Conference on Intelligent Text Processing and Computational Linguistics (CICLing'16)},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
http://www.cicling.org/2016/
Close
Pourmasoumi, Asef; Bagheri, Ebrahim; Kahani, Mohsen
Composing Optimal Execution Traces from Event Logs,` booktitle = 17th International Conference on Business Process Modeling, Development, and Support (BPMDS’16) Proceedings Article
In: 2016.
Links | BibTeX | Tags:
@inproceedings{bpmds2016,
title = {Composing Optimal Execution Traces from Event Logs,` booktitle = 17th International Conference on Business Process Modeling, Development, and Support (BPMDS’16)},
author = {Asef Pourmasoumi and Ebrahim Bagheri and Mohsen Kahani},
url = {http://www.bpmds.org/},
year = {2016},
date = {2016-01-01},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
http://www.bpmds.org/
Close
Zarrinkalam, Fattane; Fani, Hossein; Bagheri, Ebrahim; Kahani, Mohsen
Inferring Implicit Topical Interests on Twitter Proceedings Article
In: Advances in Information Retrieval - 38th European Conference on IR Research, ECIR 2016, Padua, Italy, March 20-23, 2016. Proceedings, pp. 479–491, 2016.
Abstract | Links | BibTeX | Tags:
@inproceedings{DBLP:conf/ecir/ZarrinkalamFBK16,
title = {Inferring Implicit Topical Interests on Twitter},
author = {Fattane Zarrinkalam and Hossein Fani and Ebrahim Bagheri and Mohsen Kahani},
url = {https://hosseinfani.github.io/res/papers/Inferring%20Implicit%20Topical%20Interests%20on%20Twitter.pdf},
doi = {10.1007/978-3-319-30671-1_35},
year = {2016},
date = {2016-01-01},
urldate = {2016-01-01},
booktitle = {Advances in Information Retrieval - 38th European Conference on IR
Research, ECIR 2016, Padua, Italy, March 20-23, 2016. Proceedings},
pages = {479--491},
crossref = {DBLP:conf/ecir/2016},
abstract = {Inferring user interests from their activities in the social network space has been an emerging research topic in the recent years. While much work is done towards detecting explicit interests of the users from their social posts, less work is dedicated to identifying implicit interests, which are also very important for building an accurate user model. In this paper, a graph based link prediction schema is proposed to infer implicit interests of the users towards emerging topics on Twitter. The underlying graph of our proposed work uses three types of information: user’s followerships, user’s explicit interests towards the topics, and the relatedness of the topics. To investigate the impact of each type of information on the accuracy of inferring user implicit interests, different variants of the underlying representation model are investigated along with several link prediction strategies in order to infer implicit interests. Our experimental results demonstrate that using topics relatedness information, especially when determined through semantic similarity measures, has considerable impact on improving the accuracy of user implicit interest prediction, compared to when followership information is only used.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
Inferring user interests from their activities in the social network space has been an emerging research topic in the recent years. While much work is done towards detecting explicit interests of the users from their social posts, less work is dedicated to identifying implicit interests, which are also very important for building an accurate user model. In this paper, a graph based link prediction schema is proposed to infer implicit interests of the users towards emerging topics on Twitter. The underlying graph of our proposed work uses three types of information: user’s followerships, user’s explicit interests towards the topics, and the relatedness of the topics. To investigate the impact of each type of information on the accuracy of inferring user implicit interests, different variants of the underlying representation model are investigated along with several link prediction strategies in order to infer implicit interests. Our experimental results demonstrate that using topics relatedness information, especially when determined through semantic similarity measures, has considerable impact on improving the accuracy of user implicit interest prediction, compared to when followership information is only used.
Close
https://hosseinfani.github.io/res/papers/Inferring%20Implicit%20Topical%20Intere[...]
doi:10.1007/978-3-319-30671-1_35
Close
Noorian, Mahdi; Bagheri, Ebrahim; Du, Weichang
Quality-centric Feature Model Configuration using Goal Models Proceedings Article
In: The ACM/SIGAPP Symposium on Applied Computing (SAC), 2016.
Abstract | Links | BibTeX | Tags:
@inproceedings{SAC16,
title = {Quality-centric Feature Model Configuration using Goal Models},
author = {Mahdi Noorian and Ebrahim Bagheri and Weichang Du},
url = {https://ls3.rnet.torontomu.ca/2851613-2851959/},
doi = {https://doi.org/10.1145/2851613.2851959},
year = {2016},
date = {2016-01-01},
urldate = {2016-01-01},
booktitle = {The ACM/SIGAPP Symposium on Applied Computing (SAC)},
abstract = {In software product line engineering, a feature model represents the possible configuration space and can be customized based on the stakeholders' needs. Considering the complexity of feature models in addition to the diversity of the stakeholders' expectations, the configuration process is viewed as a complex optimization problem. In this paper, we propose a holistic approach for the configuration process that seeks to satisfy the stakeholders' requirements as well as the feature models' structural and integrity constraints. Here, we model stakeholders' functional and non-functional needs and their preferences using requirement engineering goal models. We formalize the structure of the feature model, the stakeholders' objectives, and their preferences in the form of an integer linear program to automatically perform feature selection.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
In software product line engineering, a feature model represents the possible configuration space and can be customized based on the stakeholders' needs. Considering the complexity of feature models in addition to the diversity of the stakeholders' expectations, the configuration process is viewed as a complex optimization problem. In this paper, we propose a holistic approach for the configuration process that seeks to satisfy the stakeholders' requirements as well as the feature models' structural and integrity constraints. Here, we model stakeholders' functional and non-functional needs and their preferences using requirement engineering goal models. We formalize the structure of the feature model, the stakeholders' objectives, and their preferences in the form of an integer linear program to automatically perform feature selection.
Close
https://ls3.rnet.torontomu.ca/2851613-2851959/
doi:https://doi.org/10.1145/2851613.2851959
Close
Ensan, Ebrahim Bagheri Andisheh Keikha Faezeh
Query Expansion Using Pseudo Relevance Feedback on Wikipedia Proceedings Article
In: 2016.
Abstract | Links | BibTeX | Tags:
@inproceedings{QRUMS’16,
title = {Query Expansion Using Pseudo Relevance Feedback on Wikipedia},
author = {Ebrahim Bagheri Andisheh Keikha Faezeh Ensan},
url = {https://ls3.rnet.torontomu.ca/s10844-017-0466-3/},
doi = {https://doi.org/10.1007/s10844-017-0466-3},
year = {2016},
date = {2016-01-01},
urldate = {2016-01-01},
journal = {International Workshop on Query Understanding and Reformulation for Mobile and Web Search collocated with The 9th ACM International Conference on Web Search and Data Mining (WSDM 2016)},
abstract = {One of the major challenges in Web search pertains to the correct interpretation of users’ intent. Query Expansion is one of the well-known approaches for determining the intent of the user by addressing the vocabulary mismatch problem. A limitation of the current query expansion approaches is that the relations between the query terms and the expanded terms is limited. In this paper, we capture users’ intent through query expansion. We build on earlier work in the area by adopting a pseudo-relevance feedback approach; however, we advance the state of the art by proposing an approach for feature learning within the process of query expansion. In our work, we specifically consider the Wikipedia corpus as the feedback collection space and identify the best features within this context for term selection in two supervised and unsupervised models. We compare our work with state of the art query expansion techniques, the results of which show promising robustness and improved precision.

},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
One of the major challenges in Web search pertains to the correct interpretation of users’ intent. Query Expansion is one of the well-known approaches for determining the intent of the user by addressing the vocabulary mismatch problem. A limitation of the current query expansion approaches is that the relations between the query terms and the expanded terms is limited. In this paper, we capture users’ intent through query expansion. We build on earlier work in the area by adopting a pseudo-relevance feedback approach; however, we advance the state of the art by proposing an approach for feature learning within the process of query expansion. In our work, we specifically consider the Wikipedia corpus as the feedback collection space and identify the best features within this context for term selection in two supervised and unsupervised models. We compare our work with state of the art query expansion techniques, the results of which show promising robustness and improved precision.

Close
https://ls3.rnet.torontomu.ca/s10844-017-0466-3/
doi:https://doi.org/10.1007/s10844-017-0466-3
Close
Thuan, Vo-Duc; Bagheri, Ebrahim
Relation Extraction using Clause Patterns and Self-Training Proceedings Article
In: 17th International Conference on Intelligent Text Processing and Computational Linguistics (CICLing'16), 2016.
Links | BibTeX | Tags:
@inproceedings{thuan2016-1,
title = {Relation Extraction using Clause Patterns and Self-Training},
author = {Vo-Duc Thuan and Ebrahim Bagheri},
url = {https://ls3.rnet.torontomu.ca/j-ipm-2017-02-009/},
doi = {https://doi.org/10.1016/j.ipm.2017.02.009},
year = {2016},
date = {2016-01-01},
urldate = {2016-01-01},
booktitle = {17th International Conference on Intelligent Text Processing and Computational Linguistics (CICLing'16)},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
https://ls3.rnet.torontomu.ca/j-ipm-2017-02-009/
doi:https://doi.org/10.1016/j.ipm.2017.02.009
Close
Fani, Hossein
Temporal Formation and Evolution of Online Communities Proceedings Article
In: Proceedings of the Ninth ACM International Conference on Web Search and Data Mining, San Francisco, CA, USA, February 22-25, 2016, pp. 717, 2016.
Links | BibTeX | Tags:
@inproceedings{DBLP:conf/wsdm/Fani16,
title = {Temporal Formation and Evolution of Online Communities},
author = {Hossein Fani},
url = {https://ls3.rnet.torontomu.ca/2835776-2855089/},
doi = {http://doi.acm.org/10.1145/2835776.2855089},
year = {2016},
date = {2016-01-01},
urldate = {2016-01-01},
booktitle = {Proceedings of the Ninth ACM International Conference on Web Search
and Data Mining, San Francisco, CA, USA, February 22-25, 2016},
pages = {717},
crossref = {DBLP:conf/wsdm/2016},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
https://ls3.rnet.torontomu.ca/2835776-2855089/
doi:http://doi.acm.org/10.1145/2835776.2855089
Close
Fani, Hossein; Zarrinkalam, Fattane; Bagheri, Ebrahim; Du, Weichang
Time-Sensitive Topic-Based Communities on Twitter Proceedings Article
In: Advances in Artificial Intelligence - 29th Canadian Conference on Artificial Intelligence, Canadian AI 2016, Victoria, BC, Canada, May 31 - June 3, 2016. Proceedings, pp. 192–204, 2016.
Abstract | Links | BibTeX | Tags:
@inproceedings{DBLP:conf/ai/FaniZBD16,
title = {Time-Sensitive Topic-Based Communities on Twitter},
author = {Hossein Fani and Fattane Zarrinkalam and Ebrahim Bagheri and Weichang Du},
url = {https://hosseinfani.github.io/res/papers/Time-Sensitive%20Topic-Based%20Communities%20on%20Twitter.pdf},
doi = {10.1007/978-3-319-34111-8_25},
year = {2016},
date = {2016-01-01},
urldate = {2016-01-01},
booktitle = {Advances in Artificial Intelligence - 29th Canadian Conference on
Artificial Intelligence, Canadian AI 2016, Victoria, BC, Canada,
May 31 - June 3, 2016. Proceedings},
pages = {192--204},
crossref = {DBLP:conf/ai/2016},
abstract = {This paper tackles the problem of detecting temporal content-based user communities from Twitter. Most existing contentbased community detection methods consider the users who share similar topical interests to be like-minded and use this as a basis to group the users. However, such approaches overlook the potential temporality of users’ interests. In this paper, we propose to identify time-sensitive topic-based communities of users who have similar temporal tendency with regards to their topics of interest. The identification of such communities provides the potential for improving the quality of community-level studies, such as personalized recommendations and marketing campaigns that are sensitive to time. To this end, we propose a graph-based framework that utilizes multivariate time series analysis to represent users’ temporal behavior towards their topics of interest in order to identify like-minded users. Further, Topic over Time (TOT) topic model that jointly captures keyword co-occurrences and locality of those patterns over time is utilized to discover users’ topics of interest. Experimental results on our Twitter dataset demonstrates the effectiveness of our proposed temporal approach in the context of personalized news recommendation and timestamp prediction compared to non-temporal community detection methods.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
This paper tackles the problem of detecting temporal content-based user communities from Twitter. Most existing contentbased community detection methods consider the users who share similar topical interests to be like-minded and use this as a basis to group the users. However, such approaches overlook the potential temporality of users’ interests. In this paper, we propose to identify time-sensitive topic-based communities of users who have similar temporal tendency with regards to their topics of interest. The identification of such communities provides the potential for improving the quality of community-level studies, such as personalized recommendations and marketing campaigns that are sensitive to time. To this end, we propose a graph-based framework that utilizes multivariate time series analysis to represent users’ temporal behavior towards their topics of interest in order to identify like-minded users. Further, Topic over Time (TOT) topic model that jointly captures keyword co-occurrences and locality of those patterns over time is utilized to discover users’ topics of interest. Experimental results on our Twitter dataset demonstrates the effectiveness of our proposed temporal approach in the context of personalized news recommendation and timestamp prediction compared to non-temporal community detection methods.
Close
https://hosseinfani.github.io/res/papers/Time-Sensitive%20Topic-Based%20Communit[...]
doi:10.1007/978-3-319-34111-8_25
Close
2015
Bagheri, Dragan Gasevic Buturab Rizvi Ebrahim
A Systematic Review of Distributed Agile Software Engineering Journal Article
In: Journal of Software: Evolution and Process, vol. 27, no. 10, pp. 723-762, 2015.
Abstract | Links | BibTeX | Tags:
@article{jsme15,
title = {A Systematic Review of Distributed Agile Software Engineering},
author = {Dragan Gasevic Buturab Rizvi Ebrahim Bagheri},
url = {https://ls3.rnet.torontomu.ca/smr-1718/},
doi = {https://doi.org/10.1002/smr.1718},
year = {2015},
date = {2015-01-01},
urldate = {2015-01-01},
journal = {Journal of Software: Evolution and Process},
volume = {27},
number = {10},
pages = {723-762},
abstract = {Context: The combination of Agile methods and distributed software development via remote teams represents an emerging approach to addressing the challenges such as late feedback, slow project timelines, and high cost, typically associated with software development projects. However, when projects are implemented using an Agile model with distributed human resources, there are a number of challenges that need to be considered and mitigated.
Objectives: The objectives of our work are multifold. First, we would like to understand the reasons and conditions that lead to the adoption of distributed agile software engineering practices. Second, we would like to investigate and find out the most important risks that threaten a distributed agile software engineering approach and what mitigation strategies exist to address them. Finally, would like to highlight which of the available approaches among the existing agile methodologies have been successfully adopted by the community. We intend to solidify our findings by exploring the strength of the evidence that has been reported in the literature.
Methods: We carried out a systematic literature review of Distributed Agile Software Engineering techniques and approaches reported from January 1, 2007 until September 31, 2012. The adopted method follows the well-established guidelines in the literature for conducting systematic literature reviews.
Results: Sixty-three distinct studies were selected and analyzed according to the inclusion and exclusion criteria, which focused on identifying only those studies from the literature that had a significant empirical or experimental aspect to them. The results revealed a significant number of scenarios reported by the industry that documented challenges and solutions in a Distributed Agile setting. Communication, Coordination, Collaboration, and Cultural issues were listed as being the areas where challenges exist.
Conclusions: This systematic literature review found time zone difference, knowledge of resources, lack of infrastructure, missing roles and responsibilities as being the primary challenges that needed to be addressed. In terms of solutions, most papers had recommended having a good infrastructure in place for communication, encouraging team members to engage in formal and informal communications, having more face-to-face visits, training human resources on DASE and organizational practices, policies, procedures, and utilizing tools to enhance the collaboration experience, Additionally, this research provides recommendations to help improve the current state of reporting findings and results in the Distributed Agile Software Engineering domain.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}

Close
Context: The combination of Agile methods and distributed software development via remote teams represents an emerging approach to addressing the challenges such as late feedback, slow project timelines, and high cost, typically associated with software development projects. However, when projects are implemented using an Agile model with distributed human resources, there are a number of challenges that need to be considered and mitigated.
Objectives: The objectives of our work are multifold. First, we would like to understand the reasons and conditions that lead to the adoption of distributed agile software engineering practices. Second, we would like to investigate and find out the most important risks that threaten a distributed agile software engineering approach and what mitigation strategies exist to address them. Finally, would like to highlight which of the available approaches among the existing agile methodologies have been successfully adopted by the community. We intend to solidify our findings by exploring the strength of the evidence that has been reported in the literature.
Methods: We carried out a systematic literature review of Distributed Agile Software Engineering techniques and approaches reported from January 1, 2007 until September 31, 2012. The adopted method follows the well-established guidelines in the literature for conducting systematic literature reviews.
Results: Sixty-three distinct studies were selected and analyzed according to the inclusion and exclusion criteria, which focused on identifying only those studies from the literature that had a significant empirical or experimental aspect to them. The results revealed a significant number of scenarios reported by the industry that documented challenges and solutions in a Distributed Agile setting. Communication, Coordination, Collaboration, and Cultural issues were listed as being the areas where challenges exist.
Conclusions: This systematic literature review found time zone difference, knowledge of resources, lack of infrastructure, missing roles and responsibilities as being the primary challenges that needed to be addressed. In terms of solutions, most papers had recommended having a good infrastructure in place for communication, encouraging team members to engage in formal and informal communications, having more face-to-face visits, training human resources on DASE and organizational practices, policies, procedures, and utilizing tools to enhance the collaboration experience, Additionally, this research provides recommendations to help improve the current state of reporting findings and results in the Distributed Agile Software Engineering domain.
Close
https://ls3.rnet.torontomu.ca/smr-1718/
doi:https://doi.org/10.1002/smr.1718
Close
Jovanovic, Ebrahim Bagheri Dragan Gasevic John Cuzzola Jelena
Automated Classification and Localization of Daily Deal Content from the Web Journal Article
In: Applied Soft Computing, vol. 31, pp. 241-256, 2015.
Abstract | Links | BibTeX | Tags:
@article{asoc2015,
title = {Automated Classification and Localization of Daily Deal Content from the Web},
author = {Ebrahim Bagheri Dragan Gasevic John Cuzzola Jelena Jovanovic},
url = {https://ls3.rnet.torontomu.ca/asc2015/},
doi = {https://doi.org/10.1016/j.asoc.2015.02.029},
year = {2015},
date = {2015-01-01},
urldate = {2015-01-01},
journal = {Applied Soft Computing},
volume = {31},
pages = {241-256},
abstract = {Websites offering daily deal offers have received widespread attention from the end-users. The objective of such Websites is to provide time limited discounts on goods and services in the hope of enticing more customers to purchase such goods or services. The success of daily deal Websites has given rise to meta-level daily deal aggregator services that collect daily deal information from across the Web. Due to some of the unique characteristics of daily deal Websites such as high update frequency, time sensitivity, and lack of coherent information representation, many deal aggregators rely on human intervention to identify and extract deal information. In this paper, we propose an approach where daily deal information is identified, classified and properly segmented and localized. Our approach is based on a semi-supervised method that uses sentence-level features of daily deal information on a given Web page. Our work offers i) a set of computationally inexpensive discriminative features that are able to effectively distinguish Web pages that contain daily deal information; ii) the construction and systematic evaluation of machine learning techniques based on these features to automatically classify daily deal Web pages; and iii) the development of an accurate segmentation algorithm that is able to localize and extract individual deals from within a complex Web page. We have extensively evaluated our approach from different perspectives, the results of which show notable performance.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}

Close
Websites offering daily deal offers have received widespread attention from the end-users. The objective of such Websites is to provide time limited discounts on goods and services in the hope of enticing more customers to purchase such goods or services. The success of daily deal Websites has given rise to meta-level daily deal aggregator services that collect daily deal information from across the Web. Due to some of the unique characteristics of daily deal Websites such as high update frequency, time sensitivity, and lack of coherent information representation, many deal aggregators rely on human intervention to identify and extract deal information. In this paper, we propose an approach where daily deal information is identified, classified and properly segmented and localized. Our approach is based on a semi-supervised method that uses sentence-level features of daily deal information on a given Web page. Our work offers i) a set of computationally inexpensive discriminative features that are able to effectively distinguish Web pages that contain daily deal information; ii) the construction and systematic evaluation of machine learning techniques based on these features to automatically classify daily deal Web pages; and iii) the development of an accurate segmentation algorithm that is able to localize and extract individual deals from within a complex Web page. We have extensively evaluated our approach from different perspectives, the results of which show notable performance.
Close
https://ls3.rnet.torontomu.ca/asc2015/
doi:https://doi.org/10.1016/j.asoc.2015.02.029
Close
Bagheri, Jelena Jovanovic Ebrahim; Gasevic, Dragan
Comprehension and Learning of Social Goals through Visualization Journal Article
In: IEEE Transactions on Human-Machine Systems, 2015.
Abstract | Links | BibTeX | Tags:
@article{THMS2015,
title = {Comprehension and Learning of Social Goals through Visualization},
author = {Jelena Jovanovic Ebrahim Bagheri and Dragan Gasevic},
url = {https://ls3.rnet.torontomu.ca/comprehension_and_learning_of_social_goa/},
doi = {https://doi.org/10.1109/THMS.2015.2419083},
year = {2015},
date = {2015-01-01},
urldate = {2015-01-01},
journal = {IEEE Transactions on Human-Machine Systems},
abstract = {The concept of social goals refers to organizational goals that are defined in an open and transparent manner; they serve as social objects that incite both formal and informal collaboration around shared interests/objectives. Our objective is to facilitate the comprehension of social goals and examine the role of social goals as scaffolds of social learning in an organization. To this end, we followed an approach based on the visualization of social goals and explored how different presentations of goals, specifically, faceted goal browsing, graph - based visualization and timeline - based visualization, contribute to the realization of the stated objective. To assess this approach we conducted a between subjects study where each participant performed a set of goal comprehension tasks with one of the examined presentations of goals. The study demonstrated that our visualizations of goals increase the accuracy of the overall comprehension of an organizations goals; this positive effect is also present when the comprehension of relationship-either explicit or implicit tie-among social goals is needed. The results also confirmed that our graph-based visualization of social goals could serve as a facilitator of social learning in an organization.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}

Close
The concept of social goals refers to organizational goals that are defined in an open and transparent manner; they serve as social objects that incite both formal and informal collaboration around shared interests/objectives. Our objective is to facilitate the comprehension of social goals and examine the role of social goals as scaffolds of social learning in an organization. To this end, we followed an approach based on the visualization of social goals and explored how different presentations of goals, specifically, faceted goal browsing, graph - based visualization and timeline - based visualization, contribute to the realization of the stated objective. To assess this approach we conducted a between subjects study where each participant performed a set of goal comprehension tasks with one of the examined presentations of goals. The study demonstrated that our visualizations of goals increase the accuracy of the overall comprehension of an organizations goals; this positive effect is also present when the comprehension of relationship-either explicit or implicit tie-among social goals is needed. The results also confirmed that our graph-based visualization of social goals could serve as a facilitator of social learning in an organization.
Close
https://ls3.rnet.torontomu.ca/comprehension_and_learning_of_social_goa/
doi:https://doi.org/10.1109/THMS.2015.2419083
Close
Jovanovic, Jelena; Bagheri, Ebrahim
Electronic commerce meets the Semantic Web Journal Article
In: IEEE IT Professional, 2015.
Abstract | Links | BibTeX | Tags:
@article{itpro2015,
title = {Electronic commerce meets the Semantic Web},
author = {Jelena Jovanovic and Ebrahim Bagheri},
url = {https://ls3.rnet.torontomu.ca/electronic_commerce_meets_the_semantic_web_compressed/},
doi = {https://doi.org/10.1109/MITP.2016.56},
year = {2015},
date = {2015-01-01},
urldate = {2015-01-01},
journal = {IEEE IT Professional},
abstract = {Today's online retailers are facing many challenges, some of which are related to the efficient and effective integration, use and ma
intenance of product and customer data.Technologies that make e-commerce data machine comprehensible could greatly assist in overcoming e-commerce data management challenges. In this paper, we look into the intersection of Semantic Web technologies and B2C e-commerce, and explore the benefits that can be reaped by both online retailers and customers. In particular, we propose a systematic framework that highlights why and how the adoption of Semantic Web technologies can enhance B2C applications and platforms. The framework is intended primarily for e-commerce decision makers and practitioners, to help them make more informed decisions on how to address e-commerce data management challenges using Semantic Web Technologies.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}

Close
Today's online retailers are facing many challenges, some of which are related to the efficient and effective integration, use and ma
intenance of product and customer data.Technologies that make e-commerce data machine comprehensible could greatly assist in overcoming e-commerce data management challenges. In this paper, we look into the intersection of Semantic Web technologies and B2C e-commerce, and explore the benefits that can be reaped by both online retailers and customers. In particular, we propose a systematic framework that highlights why and how the adoption of Semantic Web technologies can enhance B2C applications and platforms. The framework is intended primarily for e-commerce decision makers and practitioners, to help them make more informed decisions on how to address e-commerce data management challenges using Semantic Web Technologies.
Close
https://ls3.rnet.torontomu.ca/electronic_commerce_meets_the_semantic_web_compres[...]
doi:https://doi.org/10.1109/MITP.2016.56
Close
Jovanovic, Ebrahim Bagheri Dragan Gasevic John Cuzzola Jelena
Evolutionary Fine-Tuning of Automated Semantic Annotation Systems Journal Article
In: Expert Systems with Applications, vol. 42, no. 20, pp. 6864–6877, 2015.
Abstract | Links | BibTeX | Tags:
@article{eswa15,
title = {Evolutionary Fine-Tuning of Automated Semantic Annotation Systems},
author = {Ebrahim Bagheri Dragan Gasevic John Cuzzola Jelena Jovanovic},
url = {https://ls3.rnet.torontomu.ca/j-eswa-2015-04-054/},
doi = {https://doi.org/10.1016/j.eswa.2015.04.054},
year = {2015},
date = {2015-01-01},
urldate = {2015-01-01},
journal = {Expert Systems with Applications},
volume = {42},
number = {20},
pages = {6864–6877},
abstract = {Considering the ever-increasing speed at which new textual content is generated, the efficient and effective use of large text corpora requires automated natural language processing and text analysis tools. A subset of such tools, namely automated semantic annotation tools, are capable of interlinking syntactical forms of text with their underlying semantic concepts. The performance of semantic annotation tools depends on the characteristics of the annotation task, primarily characteristics of the text to be annotated, but also some other task-related features, such as the acceptable precision/recall trade-off. Therefore, these tools need to be optimally configured in order to give their best results when applied to different annotation tasks. However, the configuration of semantic annotators is presently a tedious and time-consuming task due to numerous potential configurations. In this paper, we propose an architecture and a genetic algorithm-based method for automating the task of configuring parameter values of any automated semantic annotation tool. This is a novel and unique solution as, to our knowledge, no solution to the problem of configuring semantic annotators has been reported in the literature so far. Our experiments demonstrate our proposed work enables effective configuration of parameters of any semantic annotator system.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}

Close
Considering the ever-increasing speed at which new textual content is generated, the efficient and effective use of large text corpora requires automated natural language processing and text analysis tools. A subset of such tools, namely automated semantic annotation tools, are capable of interlinking syntactical forms of text with their underlying semantic concepts. The performance of semantic annotation tools depends on the characteristics of the annotation task, primarily characteristics of the text to be annotated, but also some other task-related features, such as the acceptable precision/recall trade-off. Therefore, these tools need to be optimally configured in order to give their best results when applied to different annotation tasks. However, the configuration of semantic annotators is presently a tedious and time-consuming task due to numerous potential configurations. In this paper, we propose an architecture and a genetic algorithm-based method for automating the task of configuring parameter values of any automated semantic annotation tool. This is a novel and unique solution as, to our knowledge, no solution to the problem of configuring semantic annotators has been reported in the literature so far. Our experiments demonstrate our proposed work enables effective configuration of parameters of any semantic annotator system.
Close
https://ls3.rnet.torontomu.ca/j-eswa-2015-04-054/
doi:https://doi.org/10.1016/j.eswa.2015.04.054
Close
Fani, Hossein; Bagheri, Ebrahim
An Ontology for Describing Security Events Proceedings Article
In: The 27th International Conference on Software Engineering and Knowledge Engineering, SEKE 2015, Wyndham Pittsburgh University Center, Pittsburgh, PA, USA, July 6-8, 2015, pp. 455–460, 2015.
Abstract | Links | BibTeX | Tags:
@inproceedings{DBLP:conf/seke/FaniB15,
title = {An Ontology for Describing Security Events},
author = {Hossein Fani and Ebrahim Bagheri},
url = {https://ls3.rnet.torontomu.ca/seke15paper_101/},
doi = {10.18293/SEKE2015-101},
year = {2015},
date = {2015-01-01},
urldate = {2015-01-01},
booktitle = {The 27th International Conference on Software Engineering and Knowledge
Engineering, SEKE 2015, Wyndham Pittsburgh University Center, Pittsburgh,
PA, USA, July 6-8, 2015},
pages = {455--460},
crossref = {DBLP:conf/seke/2015},
abstract = {Mining security events helps with better precautionary planning for community safety. However, incident records are expressed in diverse and application dependent formats which impedes common comprehension for automatic knowledge extraction and reasoning. In this paper, we present Security Incident Ontology, SIO, a novel light-weight domain ontology for security incidents. We use Timeline to annotate the temporal facts of incidents and adopt Event to represent any security issues from indecent behavior to assault to more adverse crime which raise the security alarm in a community. It will present a unique way to the security incident detectors, a police officer, Robocops, or intelligent CCTV cameras, to report security events. We use SIO in populating security incident notifications of Integrated Risk Management (IRM) at Ryerson University to evaluate its competency, for Ryerson University campus has both business and housing area in the vicinity and encompass not only high rate, but also wide variety of different security issues. SIO is developed in OWL 2 with Protégé.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
Mining security events helps with better precautionary planning for community safety. However, incident records are expressed in diverse and application dependent formats which impedes common comprehension for automatic knowledge extraction and reasoning. In this paper, we present Security Incident Ontology, SIO, a novel light-weight domain ontology for security incidents. We use Timeline to annotate the temporal facts of incidents and adopt Event to represent any security issues from indecent behavior to assault to more adverse crime which raise the security alarm in a community. It will present a unique way to the security incident detectors, a police officer, Robocops, or intelligent CCTV cameras, to report security events. We use SIO in populating security incident notifications of Integrated Risk Management (IRM) at Ryerson University to evaluate its competency, for Ryerson University campus has both business and housing area in the vicinity and encompass not only high rate, but also wide variety of different security issues. SIO is developed in OWL 2 with Protégé.
Close
https://ls3.rnet.torontomu.ca/seke15paper_101/
doi:10.18293/SEKE2015-101
Close
Jovanovic, Ebrahim Bagheri John Cuzzola Jelena
Filtering Inaccurate Entity Co-references on the Linked Open Data Proceedings Article
In: 26th International Conference on Database and Expert Systems Applications (DEXA2015), Springer, 2015.
Abstract | Links | BibTeX | Tags:
@inproceedings{dexa2015a,
title = {Filtering Inaccurate Entity Co-references on the Linked Open Data},
author = {Ebrahim Bagheri John Cuzzola Jelena Jovanovic},
url = {https://ls3.rnet.torontomu.ca/cuzzola2015_compressed/},
doi = {https://doi.org/10.1007/978-3-319-22849-5_10},
year = {2015},
date = {2015-01-01},
urldate = {2015-01-01},
booktitle = {26th International Conference on Database and Expert Systems Applications (DEXA2015)},
publisher = {Springer},
abstract = {The Linked Open Data (LOD) initiative relies heavily on the interconnections between different open RDF datasets where RDF links are used to connect resources. There has already been substantial research on identifying identity links between resources from different datasets, a process that is often referred to as co-reference resolution. These techniques often rely on probabilistic models or inference mechanisms to detect identity relations. However, recent studies have shown considerable inaccuracies in the LOD datasets that pertain to identity relations, e.g., owl:sameAs relations. In this paper, we propose a technique that evaluates existing identity links between LOD resources and identifies potentially erroneous links. Our work relies on the position and relevance of each resource with regards to the associated DBpedia categories modeled through two probabilistic category distribution and selection functions. Our experimental results show that our work is able to semantically distinguish inaccurate identity links even in cases when high syntactical similarity is observed between two resources.

},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
The Linked Open Data (LOD) initiative relies heavily on the interconnections between different open RDF datasets where RDF links are used to connect resources. There has already been substantial research on identifying identity links between resources from different datasets, a process that is often referred to as co-reference resolution. These techniques often rely on probabilistic models or inference mechanisms to detect identity relations. However, recent studies have shown considerable inaccuracies in the LOD datasets that pertain to identity relations, e.g., owl:sameAs relations. In this paper, we propose a technique that evaluates existing identity links between LOD resources and identifies potentially erroneous links. Our work relies on the position and relevance of each resource with regards to the associated DBpedia categories modeled through two probabilistic category distribution and selection functions. Our experimental results show that our work is able to semantically distinguish inaccurate identity links even in cases when high syntactical similarity is observed between two resources.

Close
https://ls3.rnet.torontomu.ca/cuzzola2015_compressed/
doi:https://doi.org/10.1007/978-3-319-22849-5_10
Close
Feng, Yue; Fani, Hossein; Bagheri, Ebrahim; Jovanovic, Jelena
Lexical Semantic Relatedness for Twitter Analytics Proceedings Article
In: 27th IEEE International Conference on Tools with Artificial Intelligence, ICTAI 2015, Vietri sul Mare, Italy, November 9-11, 2015, pp. 202–209, 2015.
Abstract | Links | BibTeX | Tags:
@inproceedings{DBLP:conf/ictai/FengFBJ15,
title = {Lexical Semantic Relatedness for Twitter Analytics},
author = {Yue Feng and Hossein Fani and Ebrahim Bagheri and Jelena Jovanovic},
url = {https://ls3.rnet.torontomu.ca/lexical-semantic-relatedness-for-twitter-analytics/},
doi = {10.1109/ICTAI.2015.41},
year = {2015},
date = {2015-01-01},
urldate = {2015-01-01},
booktitle = {27th IEEE International Conference on Tools with Artificial Intelligence,
ICTAI 2015, Vietri sul Mare, Italy, November 9-11, 2015},
pages = {202--209},
crossref = {DBLP:conf/ictai/2015},
abstract = {Existing work in the semantic relatedness literature has already considered various information sources such as WordNet, Wikipedia and Web search engines to identify the semantic relatedness between two words. We will show that existing semantic relatedness measures might not be directly applicable to microblogging content such as tweets due to i) the informality and short length of microblogging content, which can lead to shift in the meaning of words when used in microblog posts, ii) the presence of non-dictionary words that have their semantics defined/evolved by the Twitter community. Therefore, we propose the Twitter Space Semantic Relatedness (TSSR) technique that relies on the latent relation hypothesis to measure semantic relatedness of words on Twitter. We construct a graph representation of terms in tweets and apply a random walk procedure to produce a stationary distribution for each word, which is the basis for relatedness calculation. Our experiments examine TSSR from three different perspectives and show that TSSR is better suited for Twitter analytics compared to the standard semantic relatedness techniques.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
Existing work in the semantic relatedness literature has already considered various information sources such as WordNet, Wikipedia and Web search engines to identify the semantic relatedness between two words. We will show that existing semantic relatedness measures might not be directly applicable to microblogging content such as tweets due to i) the informality and short length of microblogging content, which can lead to shift in the meaning of words when used in microblog posts, ii) the presence of non-dictionary words that have their semantics defined/evolved by the Twitter community. Therefore, we propose the Twitter Space Semantic Relatedness (TSSR) technique that relies on the latent relation hypothesis to measure semantic relatedness of words on Twitter. We construct a graph representation of terms in tweets and apply a random walk procedure to produce a stationary distribution for each word, which is the basis for relatedness calculation. Our experiments examine TSSR from three different perspectives and show that TSSR is better suited for Twitter analytics compared to the standard semantic relatedness techniques.
Close
https://ls3.rnet.torontomu.ca/lexical-semantic-relatedness-for-twitter-analytics[...]
doi:10.1109/ICTAI.2015.41
Close
Kahani, Ebrahim Bagheri Asef Pourmasoumi Hassankiadeh Mohsen; Asadi, Mohsen
On Business Process Variants Generation Proceedings Article
In: CAiSE Forum, Springer, 2015.
Abstract | Links | BibTeX | Tags:
@inproceedings{caise2015,
title = {On Business Process Variants Generation},
author = {Ebrahim Bagheri Asef Pourmasoumi Hassankiadeh Mohsen Kahani and Mohsen Asadi},
url = {https://ls3.rnet.torontomu.ca/paper-24-1/},
year = {2015},
date = {2015-01-01},
urldate = {2015-01-01},
booktitle = {CAiSE Forum},
publisher = {Springer},
abstract = {Cross-organizational mining is a new research field in the process mining domain, which focuses on the analysis and mining of processes in multiple organizations. Suitable access to collections of business process variants is necessary for researchers to evaluate their work in this research domain. To the best of our knowledge, no complete collection of process variants or any process variants/log generator tool exists for this purpose. In this paper, we propose an algorithm for generating random process variants for a given process model and a supporting toolset built on top of the PLG toolset. For this purpose, we classify different factors that can serve as variation points. Then, using the structure tree based representation of an input process, we present an algorithm for applying variation points based on a user-defined variation rate. The developed tool is publicly available for researchers to use.
},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
Cross-organizational mining is a new research field in the process mining domain, which focuses on the analysis and mining of processes in multiple organizations. Suitable access to collections of business process variants is necessary for researchers to evaluate their work in this research domain. To the best of our knowledge, no complete collection of process variants or any process variants/log generator tool exists for this purpose. In this paper, we propose an algorithm for generating random process variants for a given process model and a supporting toolset built on top of the PLG toolset. For this purpose, we classify different factors that can serve as variation points. Then, using the structure tree based representation of an input process, we present an algorithm for applying variation points based on a user-defined variation rate. The developed tool is publicly available for researchers to use.

Close
https://ls3.rnet.torontomu.ca/paper-24-1/
Close
Ghashghaei, Mehrnaz; Cuzzola, John; Bagheri, Ebrahim; Ghorbani, Ali A
Semantic Annotation of Quantitative Textual Content Proceedings Article
In: Proceedings of the Third International Workshop on Linked Data for Information Extraction (LD4IE2015) co-located with the 14th International Semantic Web Conference (ISWC 2015), Bethlehem, Pennsylvania, USA, October 12, 2015., pp. 20–33, 2015.
Abstract | Links | BibTeX | Tags:
@inproceedings{DBLP:conf/semweb/GhashghaeiCBG15,
title = {Semantic Annotation of Quantitative Textual Content},
author = {Mehrnaz Ghashghaei and John Cuzzola and Ebrahim Bagheri and Ali A Ghorbani},
url = {http://ceur-ws.org/Vol-1467/LD4IE2015_Ghashghaei.pdf},
year = {2015},
date = {2015-01-01},
urldate = {2015-01-01},
booktitle = {Proceedings of the Third International Workshop on Linked Data for
Information Extraction (LD4IE2015) co-located with the 14th International
Semantic Web Conference (ISWC 2015), Bethlehem, Pennsylvania, USA,
October 12, 2015.},
pages = {20--33},
crossref = {DBLP:conf/semweb/2015ld4ie},
abstract = {Semantic annotation techniques provide the basis for linking textual content with concepts in well grounded knowledge bases. In spite of their many application areas, current semantic annotation systems have some limitations. One of the prominent limitations of such systems is that none of the existing semantic annotator systems are able to identify and disambiguate quantitative (numerical) content. In textual documents such as Web pages, specially technical contents, there are many quantitative information such as product specifications that need to be semantically qualified. In this thesis, we propose an approach for annotating quantitative values in short textual content. In our approach, we identify numeric values in the text and link them to an existing property in a knowledge base. Based on this mapping, we are then able to find the concept that the property is associated with, whereby identifying both the concept and the specific property of that concept that the numeric value belongs to. Our experiments show that our proposed approach is able to reach an accuracy of over 70% for semantically annotating quantitative content.
},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
Semantic annotation techniques provide the basis for linking textual content with concepts in well grounded knowledge bases. In spite of their many application areas, current semantic annotation systems have some limitations. One of the prominent limitations of such systems is that none of the existing semantic annotator systems are able to identify and disambiguate quantitative (numerical) content. In textual documents such as Web pages, specially technical contents, there are many quantitative information such as product specifications that need to be semantically qualified. In this thesis, we propose an approach for annotating quantitative values in short textual content. In our approach, we identify numeric values in the text and link them to an existing property in a knowledge base. Based on this mapping, we are then able to find the concept that the property is associated with, whereby identifying both the concept and the specific property of that concept that the numeric value belongs to. Our experiments show that our proposed approach is able to reach an accuracy of over 70% for semantically annotating quantitative content.

Close
http://ceur-ws.org/Vol-1467/LD4IE2015_Ghashghaei.pdf
Close
Zarrinkalam, Fattane; Fani, Hossein; Bagheri, Ebrahim; Kahani, Mohsen; Du, Weichang
Semantics-Enabled User Interest Detection from Twitter Proceedings Article
In: IEEE/WIC/ACM International Conference on Web Intelligence and Intelligent Agent Technology, WI-IAT 2015, Singapore, December 6-9, 2015 - Volume I, pp. 469–476, 2015.
Abstract | Links | BibTeX | Tags:
@inproceedings{DBLP:conf/webi/ZarrinkalamFBKD15,
title = {Semantics-Enabled User Interest Detection from Twitter},
author = {Fattane Zarrinkalam and Hossein Fani and Ebrahim Bagheri and Mohsen Kahani and Weichang Du},
url = {https://ls3.rnet.torontomu.ca/semantics-enableduserinterestdetectionfromtwitter/},
doi = {10.1109/WI-IAT.2015.182},
year = {2015},
date = {2015-01-01},
urldate = {2015-01-01},
booktitle = {IEEE/WIC/ACM International Conference on Web Intelligence and Intelligent
Agent Technology, WI-IAT 2015, Singapore, December 6-9, 2015 - Volume
I},
pages = {469--476},
crossref = {DBLP:conf/webi/2015-1},
abstract = {Social networks enable users to freely communicate with each other and share their recent news, ongoing activities or views about different topics. As a result, user interest detection from social networks has been the subject of increasing attention. Some recent works have proposed to enrich social posts by annotating them with unambiguous relevant ontological concepts extracted from external knowledge bases and model user interests as a bag of concepts. However, in the bag of concepts approach, each topic of interest is represented as an individual concept that is already predefined in the knowledge base. Therefore, it is not possible to infer fine-grained topics of interest, which are only expressible through a collection of multiple concepts or emerging topics, which are not yet defined in the knowledge base. To address these issues, we view each topic of interest as a conjunction of several concepts, which are temporally correlated on Twitter. Based on this, we extract active topics within a given time interval and determine a users inclination towards these active topics. We demonstrate the effectiveness of our approach in the context of a personalized news recommendation system. We show through extensive experimentation that our work is able to improve the state of the art.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
Social networks enable users to freely communicate with each other and share their recent news, ongoing activities or views about different topics. As a result, user interest detection from social networks has been the subject of increasing attention. Some recent works have proposed to enrich social posts by annotating them with unambiguous relevant ontological concepts extracted from external knowledge bases and model user interests as a bag of concepts. However, in the bag of concepts approach, each topic of interest is represented as an individual concept that is already predefined in the knowledge base. Therefore, it is not possible to infer fine-grained topics of interest, which are only expressible through a collection of multiple concepts or emerging topics, which are not yet defined in the knowledge base. To address these issues, we view each topic of interest as a conjunction of several concepts, which are temporally correlated on Twitter. Based on this, we extract active topics within a given time interval and determine a users inclination towards these active topics. We demonstrate the effectiveness of our approach in the context of a personalized news recommendation system. We show through extensive experimentation that our work is able to improve the state of the art.
Close
https://ls3.rnet.torontomu.ca/semantics-enableduserinterestdetectionfromtwitter/
doi:10.1109/WI-IAT.2015.182
Close
Smith, Jacky Au Duong Mager Alanna Ebrahim Bagheri Frauke Zeller David Harris; Rudzicz, Frank
Social Media in Human-Robot Interaction Proceedings Article
In: Social Media & Society Conference (#SMSociety15), 2015.
Links | BibTeX | Tags:
@inproceedings{SMSociety15,
title = {Social Media in Human-Robot Interaction},
author = {Jacky Au Duong Mager Alanna Ebrahim Bagheri Frauke Zeller David Harris Smith and Frank Rudzicz},
url = {https://socialmediaandsociety.com/},
year = {2015},
date = {2015-01-01},
booktitle = {Social Media & Society Conference (#SMSociety15)},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
https://socialmediaandsociety.com/
Close
-, Duc; Bagheri, Ebrahim
Syntactic and Semantic Structures for Relation Extraction Proceedings Article
In: Sixth BCS-IRSG Symposium on Future Directions in Information Access, FDIA 2015, 31 August - 4 September 2015, Thessaloniki, Greece, 2015.
Links | BibTeX | Tags:
@inproceedings{DBLP:conf/fdia/VoB15,
title = {Syntactic and Semantic Structures for Relation Extraction},
author = {Duc - and Ebrahim Bagheri},
url = {https://ls3.rnet.torontomu.ca/024_vo/},
doi = {http://dx.doi.org/10.14236/ewic/FDIA2015.7},
year = {2015},
date = {2015-01-01},
urldate = {2015-01-01},
booktitle = {Sixth BCS-IRSG Symposium on Future Directions in Information Access,
FDIA 2015, 31 August - 4 September 2015, Thessaloniki, Greece},
crossref = {DBLP:conf/fdia/2015},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
https://ls3.rnet.torontomu.ca/024_vo/
doi:http://dx.doi.org/10.14236/ewic/FDIA2015.7
Close
2014
Bagheri, Mohsen Asadi Weichang Du Mahdi Noorian Ebrahim
Addressing Non-Functional Properties in Feature Models: A Goal-Oriented Approach Journal Article
In: International Journal of Software Engineering and Knowledge Engineering, vol. 24, no. 10, pp. 1439-1488, 2014.
Abstract | BibTeX | Tags:
@article{IJSEKE2014,
title = {Addressing Non-Functional Properties in Feature Models: A Goal-Oriented Approach},
author = {Mohsen Asadi Weichang Du Mahdi Noorian Ebrahim Bagheri},
year = {2014},
date = {2014-01-01},
journal = {International Journal of Software Engineering and Knowledge Engineering},
volume = {24},
number = {10},
pages = {1439-1488},
abstract = {Software Product Line (SPL) engineering is a systematic reuse-based software development approach which is founded on the idea of building software products using a set of core assets rather than developing individual software systems from scratch. Feature models are among the widely used artefacts for SPL development that mostly capture functional and operational variability of a system. Researchers have argued that connecting intentional variability models such as goal models with feature variability models in a target domain can enrich feature models with valuable quality and non-functional information. Interrelating goal models and feature models has already been proposed in the literature for capturing non-functional properties in software product lines; however this manual integration process is cumbersome and tedious. In this paper, we propose a (semi) automated approach that systematically integrates feature models and goal models through standard ontologies. Our proposed approach connects feature model and goal model elements through measuring the semantic similarity of their annotated ontological concepts. Our work not only provides the means to systematically interrelate feature models and goal models but also allows domain engineers to identify and model the role and signicance of non-functional properties in the domain represented by the feature model.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}

Close
Software Product Line (SPL) engineering is a systematic reuse-based software development approach which is founded on the idea of building software products using a set of core assets rather than developing individual software systems from scratch. Feature models are among the widely used artefacts for SPL development that mostly capture functional and operational variability of a system. Researchers have argued that connecting intentional variability models such as goal models with feature variability models in a target domain can enrich feature models with valuable quality and non-functional information. Interrelating goal models and feature models has already been proposed in the literature for capturing non-functional properties in software product lines; however this manual integration process is cumbersome and tedious. In this paper, we propose a (semi) automated approach that systematically integrates feature models and goal models through standard ontologies. Our proposed approach connects feature model and goal model elements through measuring the semantic similarity of their annotated ontological concepts. Our work not only provides the means to systematically interrelate feature models and goal models but also allows domain engineers to identify and model the role and signicance of non-functional properties in the domain represented by the feature model.
Close
Jovanovic, Jelena; Bagheri, Ebrahim; Cuzzola, John; Gasevic, Dragan; Jeremic, Zoran; Bashash, Reza
Automated Semantic Tagging of Textual Content Journal Article
In: IEEE IT Professional, vol. 16, no. 6, pp. 38-46, 2014.
Abstract | BibTeX | Tags:
@article{ITPRO2014,
title = {Automated Semantic Tagging of Textual Content},
author = {Jelena Jovanovic and Ebrahim Bagheri and John Cuzzola and Dragan Gasevic and Zoran Jeremic and Reza Bashash},
year = {2014},
date = {2014-01-01},
journal = {IEEE IT Professional},
volume = {16},
number = {6},
pages = {38-46},
abstract = {Motivated by a continually increasing demand for applications that depend on machine comprehension of text-based content, researchers, in both academia and industry, have developed innovative solutions for automated information extraction from text. In this article, we focus on a subset of such tools - i.e., semantic taggers - that not only extract and disambiguate entities mentioned in the text, but also identify topics that unambiguously describe the text's main themes. We offer insight into the process of semantic tagging, the capabilities and specificities of today's semantic taggers, and also indicate some of the criteria to be considered when choosing a tagger to use.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}

Close
Motivated by a continually increasing demand for applications that depend on machine comprehension of text-based content, researchers, in both academia and industry, have developed innovative solutions for automated information extraction from text. In this article, we focus on a subset of such tools - i.e., semantic taggers - that not only extract and disambiguate entities mentioned in the text, but also identify topics that unambiguously describe the text's main themes. We offer insight into the process of semantic tagging, the capabilities and specificities of today's semantic taggers, and also indicate some of the criteria to be considered when choosing a tagger to use.
Close
Bagheri, Ebrahim; Ensan, Faezeh
Dynamic Decision Models for Staged Software Product Line Configuration Journal Article
In: Requirements Engineering Journal, vol. 19, no. 2, pp. 187-212, 2014.
Abstract | BibTeX | Tags:
@article{DBLP:journals/REJ/BagheriE13,
title = {Dynamic Decision Models for Staged Software Product Line Configuration},
author = {Ebrahim Bagheri and Faezeh Ensan},
year = {2014},
date = {2014-01-01},
journal = {Requirements Engineering Journal},
volume = {19},
number = {2},
pages = {187-212},
abstract = {Software product line engineering practices offer desirable characteristics such as rapid product development, reduced time-to-market and more affordable development costs as a result of systematic representation of the variabilities of a domain of discourse that leads to methodical reuse of software assets. The development lifecycle of a product line consists of two main phases: domain engineering, which deals with the understanding and formally modeling of the target domain; and application engineering that is concerned with the configuration of a product line into one concrete product based on the preferences and requirements of the stakeholders. The work presented in this paper focuses on the application engineering phase and builds both the theoretical and technological tools to assist the stakeholders in (1) understanding the complex interactions of the features of a product line; (2) eliciting the utility of each feature for the stakeholders and hence exposing the stakeholders' otherwise implicit preferences in a way that they can more easily make decisions; and (3) dynamically building a decision model through interaction with the stakeholders and by considering the structural characteristics of software product line feature models, which will guide the stakeholders through the product configuration process. Initial exploratory empirical experiments that we have performed show that our proposed approach for helping stakeholders understand their feature preferences and its associated staged feature model configuration process is able to positively impact the quality of the end results of the application engineering process within the context of the limited number of participants. In addition, it has been observed that the offered tooling support is able to ease the staged feature model configuration process.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}

Close
Software product line engineering practices offer desirable characteristics such as rapid product development, reduced time-to-market and more affordable development costs as a result of systematic representation of the variabilities of a domain of discourse that leads to methodical reuse of software assets. The development lifecycle of a product line consists of two main phases: domain engineering, which deals with the understanding and formally modeling of the target domain; and application engineering that is concerned with the configuration of a product line into one concrete product based on the preferences and requirements of the stakeholders. The work presented in this paper focuses on the application engineering phase and builds both the theoretical and technological tools to assist the stakeholders in (1) understanding the complex interactions of the features of a product line; (2) eliciting the utility of each feature for the stakeholders and hence exposing the stakeholders' otherwise implicit preferences in a way that they can more easily make decisions; and (3) dynamically building a decision model through interaction with the stakeholders and by considering the structural characteristics of software product line feature models, which will guide the stakeholders through the product configuration process. Initial exploratory empirical experiments that we have performed show that our proposed approach for helping stakeholders understand their feature preferences and its associated staged feature model configuration process is able to positively impact the quality of the end results of the application engineering process within the context of the limited number of participants. In addition, it has been observed that the offered tooling support is able to ease the staged feature model configuration process.
Close
Bashari, Mahdi; Noorian, Mahdi; Bagheri, Ebrahim
Product Line Stakeholder Preference Elicitation via Decision Processes Journal Article
In: International Journal of Knowledge and Systems Science, vol. 5, no. 4, pp. 35-51, 2014.
Abstract | BibTeX | Tags:
@article{ijkss,
title = {Product Line Stakeholder Preference Elicitation via Decision Processes},
author = {Mahdi Bashari and Mahdi Noorian and Ebrahim Bagheri},
year = {2014},
date = {2014-10-01},
journal = {International Journal of Knowledge and Systems Science},
volume = {5},
number = {4},
pages = {35-51},
abstract = {In the software product line configuration process, certain features are selected based on the stakeholders' needs and preferences regarding the available functional and quality properties. This book chapter presents how a product configuration can be modeled as a decision process and how an optimal strategy representing the stakeholders' desirable configuration can be found. In the decision process model of product configuration, the product is configured by making decisions at a number of decision points. The decisions at each of these decision points contribute to functional and quality attributes of the final product. In order to find an optimal strategy for the decision process, a utility-based approach can be adopted, through which, the strategy with the highest utility is selected as the optimal strategy. In order to define utility for each strategy, a multi-attribute utility function is defined over functional and quality properties of a configured product and a utility elicitation process is then introduced for finding this utility function. The utility elicitation process works based on asking gamble queries over functional and quality requirement from the stakeholder. Using this utility function, the optimal strategy and therefore optimal product configuration is determined.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}

Close
In the software product line configuration process, certain features are selected based on the stakeholders' needs and preferences regarding the available functional and quality properties. This book chapter presents how a product configuration can be modeled as a decision process and how an optimal strategy representing the stakeholders' desirable configuration can be found. In the decision process model of product configuration, the product is configured by making decisions at a number of decision points. The decisions at each of these decision points contribute to functional and quality attributes of the final product. In order to find an optimal strategy for the decision process, a utility-based approach can be adopted, through which, the strategy with the highest utility is selected as the optimal strategy. In order to define utility for each strategy, a multi-attribute utility function is defined over functional and quality properties of a configured product and a utility elicitation process is then introduced for finding this utility function. The utility elicitation process works based on asking gamble queries over functional and quality requirement from the stakeholder. Using this utility function, the optimal strategy and therefore optimal product configuration is determined.
Close
Bagheri, Ebrahim; Ensan, Faezeh
Reliability Estimation for Component-based Software Product Lines Journal Article
In: IEEE/Canadian Journal of Electrical and Computer Engineering, vol. 37, no. 2, pp. 94–112, 2014.
Abstract | Links | BibTeX | Tags:
@article{CJECE2013,
title = {Reliability Estimation for Component-based Software Product Lines},
author = {Ebrahim Bagheri and Faezeh Ensan},
url = {10.1109/CJECE.2014.2323958},
year = {2014},
date = {2014-01-01},
journal = {IEEE/Canadian Journal of Electrical and Computer Engineering},
volume = {37},
number = {2},
pages = {94--112},
abstract = {The objective of the software product line engineering paradigm is to enhance the large-scale reuse of common core assets within a target domain. Reuse is faciliated by systematically organizing and modeling the core assets and the relationships between them. One of the main core assets of a domain is the model for representing the available functional aspects, often known as emphfeatures, within structured forms such as feature models. The selection and composition of the most suitable or desirable set of features for a given purpose allows the rapid development of new final products from the software product line. Product developers are in most cases not only interested in building applications that possess certain functional characteristics but also concerned with non-functional properties of the final product such as textitreliability. To this end, we propose a component-based software product line reliability estimation model that is able to provide lower and upper reliability bound guarantees for a software product line feature model, its specializations and configurations. Our model builds on top of the reliability of the individual features that are present in the product line and provides best and worst-case estimates. Our work is based on an essential and widely used assumption that features are implemented using self-contained software components or services whose reliability can be determined independently. We also propose reliability-aware configuration methods that ensure the satisfaction of both functional and reliability requirements during the application development process. We offer our observations and insight into the performance of our reliability estimation model and provide analysis of its advantages and shortcomings.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}

Close
The objective of the software product line engineering paradigm is to enhance the large-scale reuse of common core assets within a target domain. Reuse is faciliated by systematically organizing and modeling the core assets and the relationships between them. One of the main core assets of a domain is the model for representing the available functional aspects, often known as emphfeatures, within structured forms such as feature models. The selection and composition of the most suitable or desirable set of features for a given purpose allows the rapid development of new final products from the software product line. Product developers are in most cases not only interested in building applications that possess certain functional characteristics but also concerned with non-functional properties of the final product such as textitreliability. To this end, we propose a component-based software product line reliability estimation model that is able to provide lower and upper reliability bound guarantees for a software product line feature model, its specializations and configurations. Our model builds on top of the reliability of the individual features that are present in the product line and provides best and worst-case estimates. Our work is based on an essential and widely used assumption that features are implemented using self-contained software components or services whose reliability can be determined independently. We also propose reliability-aware configuration methods that ensure the satisfaction of both functional and reliability requirements during the application development process. We offer our observations and insight into the performance of our reliability estimation model and provide analysis of its advantages and shortcomings.
Close
10.1109/CJECE.2014.2323958
Close
Soltani, Dragan Gasevic Marek Hatala Mohsen Asadi Samaneh; Bagheri, Ebrahim
Towards Automated Feature Model Configuration With Optimizing Non-Functional Requirements Journal Article
In: Information and Software Technology Journal, vol. 56, no. 9, pp. 1144–1165, 2014.
Abstract | BibTeX | Tags:
@article{IST2014,
title = {Towards Automated Feature Model Configuration With Optimizing Non-Functional Requirements},
author = {Dragan Gasevic Marek Hatala Mohsen Asadi Samaneh Soltani and Ebrahim Bagheri},
year = {2014},
date = {2014-01-01},
journal = {Information and Software Technology Journal},
volume = {56},
number = {9},
pages = {1144–1165},
abstract = {Context: A Software Product Line is a family of software systems that share some common features but also have signicant variabilities. A feature model is a variability modeling artifact, which represents dierences among software products with respect to the variability relationships among their features. Having a feature model along with a reference model developed in the domain engineering lifecycle, a concrete product of the family is derived by binding the variation points in the feature model (called configuration process) and by instantiating the reference model. Objective: In this work we address the feature model conguration problem and propose a framework to automatically select suitable features that satisfy both the functional and non-functional preferences and constraints of stakeholders. Additionally, interdependencies between various non-functional properties are taken into account in the framework. Method: The proposed framework combines Analytical Hierarchy Process (AHP) and Fuzzy Cognitive Maps (FCM) to compute the non-functional properties weights based on stakeholders' preferences and interdependencies between non-functional properties. Afterwards, Hierarchical Task Network (HTN) planning is applied to nd the optimal feature model conguration. Result: Our approach improves state-of-art of feature model conguration by considering positive or negative impacts of the features on non-functional properties, the stakeholders' preferences, and non-functional interdependencies. The approach presented in this paper extends earlier work presented in [1] from several distinct perspectives including mechanisms handling interdependencies between non-functional properties, proposing a novel tooling architecture, and oering visualization and interaction techniques for representing functional and non-functional aspects of feature models. Conclusion: Our experiments show the scalability of our conguration approach when considering both functional and non-functional requirements of stakeholders.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}

Close
Context: A Software Product Line is a family of software systems that share some common features but also have signicant variabilities. A feature model is a variability modeling artifact, which represents dierences among software products with respect to the variability relationships among their features. Having a feature model along with a reference model developed in the domain engineering lifecycle, a concrete product of the family is derived by binding the variation points in the feature model (called configuration process) and by instantiating the reference model. Objective: In this work we address the feature model conguration problem and propose a framework to automatically select suitable features that satisfy both the functional and non-functional preferences and constraints of stakeholders. Additionally, interdependencies between various non-functional properties are taken into account in the framework. Method: The proposed framework combines Analytical Hierarchy Process (AHP) and Fuzzy Cognitive Maps (FCM) to compute the non-functional properties weights based on stakeholders' preferences and interdependencies between non-functional properties. Afterwards, Hierarchical Task Network (HTN) planning is applied to nd the optimal feature model conguration. Result: Our approach improves state-of-art of feature model conguration by considering positive or negative impacts of the features on non-functional properties, the stakeholders' preferences, and non-functional interdependencies. The approach presented in this paper extends earlier work presented in [1] from several distinct perspectives including mechanisms handling interdependencies between non-functional properties, proposing a novel tooling architecture, and oering visualization and interaction techniques for representing functional and non-functional aspects of feature models. Conclusion: Our experiments show the scalability of our conguration approach when considering both functional and non-functional requirements of stakeholders.
Close
Noorian, Mahdi; Bagheri, Ebrahim; Du, Weichang
Capturing Non-functional Properties through Model Interlinking Proceedings Article
In: IEEE Canadian Conference on Electrical and Computer Engineering (CCECE), 2014.
BibTeX | Tags:
@inproceedings{CCECE14,
title = {Capturing Non-functional Properties through Model Interlinking},
author = {Mahdi Noorian and Ebrahim Bagheri and Weichang Du},
year = {2014},
date = {2014-01-01},
booktitle = {IEEE Canadian Conference on Electrical and Computer Engineering (CCECE)},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
Noorian, Mahdi; Bagheri, Ebrahim; Du, Weichang
From Intentions to Decisions: Understanding Stakeholders' Objectives in Software Product Line Configuration Proceedings Article
In: International Conference on Software Engineering and Knowledge Engineering (SEKE 2014), 2014.
BibTeX | Tags:
@inproceedings{seke2014,
title = {From Intentions to Decisions: Understanding Stakeholders' Objectives in Software Product Line Configuration},
author = {Mahdi Noorian and Ebrahim Bagheri and Weichang Du},
year = {2014},
date = {2014-01-01},
booktitle = {International Conference on Software Engineering and Knowledge Engineering (SEKE 2014)},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
Kiyadeh, Asef Pourmasoumi Hasan; Kahani, Mohsen; Bagheri, Ebrahim; Asadi, Mohsen
Mining Common Morphological Fragments from Process Event Logs Proceedings Article
In: Proceedings of the 2014 Conference of the Centre for Advanced Studies on Collaborative Research (CASCON 2014), 2014.
BibTeX | Tags:
@inproceedings{cascon2014,
title = {Mining Common Morphological Fragments from Process Event Logs},
author = {Asef Pourmasoumi Hasan Kiyadeh and Mohsen Kahani and Ebrahim Bagheri and Mohsen Asadi},
year = {2014},
date = {2014-01-01},
booktitle = {Proceedings of the 2014 Conference of the Centre for Advanced Studies on Collaborative Research (CASCON 2014)},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
2013
Ognjanovic, Ivana; Gasevic, Dragan; Bagheri, Ebrahim
A stratified framework for handling conditional preferences: An extension of the analytic hierarchy process Journal Article
In: Expert Syst. Appl., vol. 40, no. 4, pp. 1094-1115, 2013.
Abstract | BibTeX | Tags:
@article{DBLP:journals/eswa/OgnjanovicGB13,
title = {A stratified framework for handling conditional preferences:
An extension of the analytic hierarchy process},
author = {Ivana Ognjanovic and Dragan Gasevic and Ebrahim Bagheri},
year = {2013},
date = {2013-01-01},
journal = {Expert Syst. Appl.},
volume = {40},
number = {4},
pages = {1094-1115},
abstract = {Representing and reasoning over different forms of preferences is of crucial importance to many different fields, especially where numerical comparisons need to be made between critical options. Focusing on the well-known Analytical Hierarchical Process (AHP) method, we propose a two-layered framework for addressing different kinds of conditional preferences which include partial information over preferences and preferences of a lexicographic kind. The proposed formal two-layered framework, called CS-AHP, provides the means for representing and reasoning over conditional preferences. The framework can also effectively order decision outcomes based on conditional preferences in a way that is consistent with well-formed preferences. Finally, the framework provides an estimation of the potential number of violations and inconsistencies within the preferences. We provide and report extensive performance analysis for the proposed framework from three different perspectives, namely time-complexity, simulated decision making scenarios, and handling cyclic and partially defined preferences.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}

Close
Representing and reasoning over different forms of preferences is of crucial importance to many different fields, especially where numerical comparisons need to be made between critical options. Focusing on the well-known Analytical Hierarchical Process (AHP) method, we propose a two-layered framework for addressing different kinds of conditional preferences which include partial information over preferences and preferences of a lexicographic kind. The proposed formal two-layered framework, called CS-AHP, provides the means for representing and reasoning over conditional preferences. The framework can also effectively order decision outcomes based on conditional preferences in a way that is consistent with well-formed preferences. Finally, the framework provides an estimation of the potential number of violations and inconsistencies within the preferences. We provide and report extensive performance analysis for the proposed framework from three different perspectives, namely time-complexity, simulated decision making scenarios, and handling cyclic and partially defined preferences.
Close
Bagheri, Ebrahim; Freitas, Fred; Santos, Luiz Olavo Bonino Silva
Forward to the special issue on vocabularies, ontologies and rules for the enterprise Journal Article
In: Information Systems, vol. 38, no. 5, pp. 688-689, 2013.
BibTeX | Tags:
@article{DBLP:journals/is/BagheriFS13,
title = {Forward to the special issue on vocabularies, ontologies
and rules for the enterprise},
author = {Ebrahim Bagheri and Fred Freitas and Luiz Olavo Bonino Silva Santos},
year = {2013},
date = {2013-01-01},
journal = {Information Systems},
volume = {38},
number = {5},
pages = {688-689},
keywords = {},
pubstate = {published},
tppubtype = {article}
}

Close
Bagheri, Ebrahim; Ensan, Faezeh
Light-Weight Software Product Lines for Small and Medium-sized Enterprises (SMEs) Proceedings Article
In: Proceedings of the 2013 Conference of the Centre for Advanced Studies on Collaborative Research (CASCON 2013), pp. 1-15, 2013.
BibTeX | Tags:
@inproceedings{DBLP:conf/cascon/BagheriE13,
title = {Light-Weight Software Product Lines for Small and Medium-sized Enterprises (SMEs)},
author = {Ebrahim Bagheri and Faezeh Ensan},
year = {2013},
date = {2013-01-01},
booktitle = {Proceedings of the 2013 Conference of the Centre for Advanced Studies on Collaborative Research (CASCON 2013)},
pages = {1-15},
crossref = {http://www.informatik.uni-trier.de/~ley/db/conf/cascon/index.html},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
Cuzzola, John; Bagheri, Ebrahim; Gasevic, Dragan
Product Centric Web Page Segmentation and Localization Proceedings Article
In: Canadian Semantic Web Symposium, 2013.
Abstract | BibTeX | Tags:
@inproceedings{DBLP:conf/CSWC/CuzzolaBG13b,
title = {Product Centric Web Page Segmentation and Localization},
author = {John Cuzzola and Ebrahim Bagheri and Dragan Gasevic},
year = {2013},
date = {2013-01-01},
booktitle = {Canadian Semantic Web Symposium},
abstract = {The Internet is home to an ever increasing array of goods and services available to the general consumer. These products are often discovered through search engines whose focus is on document retrieval rather than product procurement. The demand for details of specific products as opposed to just documents containing such information has resulted in an influx of product collection databases, deal aggregation services, mobile apps, twitter feeds and other just-in-time methods for rapid finding, indexing, and notifying shoppers to sale events. This has led to our development of intelligent Web crawler technology aimed towards this specific category of information retrieval. In this paper, we demonstrate our solution for Web page categorization, segmentation and localization for identifying Web pages with shopping deals and automatically extracting specifics from the identified Web pages. Our work is supported with empirical data of its effectiveness. A screencast demonstration is also available online at http://youtu.be/HHPme6AJuCk.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
The Internet is home to an ever increasing array of goods and services available to the general consumer. These products are often discovered through search engines whose focus is on document retrieval rather than product procurement. The demand for details of specific products as opposed to just documents containing such information has resulted in an influx of product collection databases, deal aggregation services, mobile apps, twitter feeds and other just-in-time methods for rapid finding, indexing, and notifying shoppers to sale events. This has led to our development of intelligent Web crawler technology aimed towards this specific category of information retrieval. In this paper, we demonstrate our solution for Web page categorization, segmentation and localization for identifying Web pages with shopping deals and automatically extracting specifics from the identified Web pages. Our work is supported with empirical data of its effectiveness. A screencast demonstration is also available online at http://youtu.be/HHPme6AJuCk.
Close
Cuzzola, John; Jeremic, Zoran; Bagheri, Ebrahim; Gasevic, Dragan; Jovanovic, Jelena; Bashash, Reza
Semantic Tagging with Linked Open Data Proceedings Article
In: Canadian Semantic Web Symposium, 2013.
Abstract | BibTeX | Tags:
@inproceedings{DBLP:conf/CSWC/CuzzolaBG13,
title = {Semantic Tagging with Linked Open Data},
author = {John Cuzzola and Zoran Jeremic and Ebrahim Bagheri and Dragan Gasevic and Jelena Jovanovic and Reza Bashash},
year = {2013},
date = {2013-01-01},
booktitle = {Canadian Semantic Web Symposium},
abstract = {Making sense of text is a challenge for computers particularly with the ambiguity associated with language. Various annotators continue to be developed using a variety of techniques in order to provide context to text. In this paper, we describe Denote - our annotator that uses a structured ontology, machine learning, and statistical analysis to perform tagging and topic discovery. A short screencast for the curious is also available at http://youtu.be/espItTRQVzY as well as demonstration links provided in the conclusion.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
Making sense of text is a challenge for computers particularly with the ambiguity associated with language. Various annotators continue to be developed using a variety of techniques in order to provide context to text. In this paper, we describe Denote - our annotator that uses a structured ontology, machine learning, and statistical analysis to perform tagging and topic discovery. A short screencast for the curious is also available at http://youtu.be/espItTRQVzY as well as demonstration links provided in the conclusion.
Close
Gasevic, John Cuzzola Jelena Jovanovic Reza Bashash Ebrahim Bagheri Dragan
Method and System of Intelligent Generation of Structured Data and Object Discovery from the Web using Text, Images and Video and other Data Patent
US 61/825995, 2013.
BibTeX | Tags:
@patent{sidebuy,
title = {Method and System of Intelligent Generation of Structured Data and Object Discovery from the Web using Text, Images and Video and other Data},
author = {John Cuzzola Jelena Jovanovic Reza Bashash Ebrahim Bagheri Dragan Gasevic},
year = {2013},
date = {2013-01-01},
number = {US 61/825995},
location = {US},
keywords = {},
pubstate = {published},
tppubtype = {patent}
}

Close
2012
Bagheri, Ebrahim; Ensan, Faezeh; Gasevic, Dragan
Decision support for the software product line domain engineering lifecycle Journal Article
In: Automed Software Engineering Journal, vol. 19, no. 3, pp. 335-377, 2012.
Abstract | BibTeX | Tags:
@article{DBLP:journals/ase/BagheriEG12,
title = {Decision support for the software product line domain engineering
lifecycle},
author = {Ebrahim Bagheri and Faezeh Ensan and Dragan Gasevic},
year = {2012},
date = {2012-01-01},
journal = {Automed Software Engineering Journal},
volume = {19},
number = {3},
pages = {335-377},
abstract = {Software product line engineering is a paradigm that advocates the reusability of software engineering assets and the rapid development of new applications for a target domain. These objectives are achieved by capturing the commonalities and variabilities between the applications of the target domain and through the development of comprehensive and variability-covering feature models. The feature models developed within the software product line development process need to cover the relevant features and aspects of the target domain. In other words, the feature models should be elaborate representations of the feature space of that domain. Given that feature models, i.e., software product line feature models, are developed mostly by domain analysts by sifting through domain documentation, corporate records and transcribed interviews, the process is a cumbersome and error-prone one. In this paper, we propose a decision support platform that assists domain analysts throughout the domain engineering lifecycle by: (1) automatically performing natural language processing tasks over domain documents and identifying important information for the domain analysts such as the features and integrity constraints that exist in the domain documents; (2) providing a collaboration platform around the domain documents such that multiple domain analysts can collaborate with each other during the process using a Wiki; (3) formulating semantic links between domain terminology with external widely used ontologies such as WordNet in order to disambiguate the terms used in domain documents; and (4) developing traceability links between the unstructured information available in the domain documents and their formal counterparts within the formal feature model representations. Results obtained from our controlled experimentations show that the decision support platform is effective in increasing the performance of the domain analysts during the domain engineering lifecycle in terms of both the coverage and accuracy measures.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}

Close
Software product line engineering is a paradigm that advocates the reusability of software engineering assets and the rapid development of new applications for a target domain. These objectives are achieved by capturing the commonalities and variabilities between the applications of the target domain and through the development of comprehensive and variability-covering feature models. The feature models developed within the software product line development process need to cover the relevant features and aspects of the target domain. In other words, the feature models should be elaborate representations of the feature space of that domain. Given that feature models, i.e., software product line feature models, are developed mostly by domain analysts by sifting through domain documentation, corporate records and transcribed interviews, the process is a cumbersome and error-prone one. In this paper, we propose a decision support platform that assists domain analysts throughout the domain engineering lifecycle by: (1) automatically performing natural language processing tasks over domain documents and identifying important information for the domain analysts such as the features and integrity constraints that exist in the domain documents; (2) providing a collaboration platform around the domain documents such that multiple domain analysts can collaborate with each other during the process using a Wiki; (3) formulating semantic links between domain terminology with external widely used ontologies such as WordNet in order to disambiguate the terms used in domain documents; and (4) developing traceability links between the unstructured information available in the domain documents and their formal counterparts within the formal feature model representations. Results obtained from our controlled experimentations show that the decision support platform is effective in increasing the performance of the domain analysts during the domain engineering lifecycle in terms of both the coverage and accuracy measures.
Close
Asadi, Mohsen; Mohabbati, Bardia; Gasevic, Dragan; Bagheri, Ebrahim; Hatala, Marek
Developing Semantically-Enabled Families of Method-Oriented Architectures Journal Article
In: IJISMD, vol. 3, no. 4, pp. 1-26, 2012.
BibTeX | Tags:
@article{DBLP:journals/ijismd/AsadiMGBH12,
title = {Developing Semantically-Enabled Families of Method-Oriented
Architectures},
author = {Mohsen Asadi and Bardia Mohabbati and Dragan Gasevic and Ebrahim Bagheri and Marek Hatala},
year = {2012},
date = {2012-01-01},
journal = {IJISMD},
volume = {3},
number = {4},
pages = {1-26},
keywords = {},
pubstate = {published},
tppubtype = {article}
}

Close
Bagheri, Ebrahim; Gasevic, Dragan
Foreword to the special issue on quality engineering for software product lines Journal Article
In: Software Quality Journal, vol. 20, no. 3-4, pp. 421-424, 2012.
Abstract | BibTeX | Tags:
@article{DBLP:journals/sqj/BagheriG12,
title = {Foreword to the special issue on quality engineering for
software product lines},
author = {Ebrahim Bagheri and Dragan Gasevic},
year = {2012},
date = {2012-01-01},
journal = {Software Quality Journal},
volume = {20},
number = {3-4},
pages = {421-424},
abstract = {This current special issue on Quality Engineering for Software Product Lines was produced with the purpose of depicting the state of the art and practice. We received manyvery high-quality submissions from leading researchers in the area and finally accepted seven papers for inclusion in the special issue. Each paper was peer-reviewed by at least three expert reviewers to ensure that the highest quality standards were met. We believe that this special issue contains some of the leading works in quality engineering for software product lines and can serve as a foundation for further advancing effort in this area.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}

Close
This current special issue on Quality Engineering for Software Product Lines was produced with the purpose of depicting the state of the art and practice. We received manyvery high-quality submissions from leading researchers in the area and finally accepted seven papers for inclusion in the special issue. Each paper was peer-reviewed by at least three expert reviewers to ensure that the highest quality standards were met. We believe that this special issue contains some of the leading works in quality engineering for software product lines and can serve as a foundation for further advancing effort in this area.
Close
Bagheri, Ebrahim; Noia, Tommaso Di; Gasevic, Dragan; Ragone, Azzurra
Formalizing interactive staged feature model configuration Journal Article
In: Journal of Software: Evolution and Process, vol. 24, no. 4, pp. 375-400, 2012.
Abstract | BibTeX | Tags:
@article{DBLP:journals/smr/BagheriNGR12,
title = {Formalizing interactive staged feature model configuration},
author = {Ebrahim Bagheri and Tommaso Di Noia and Dragan Gasevic and Azzurra Ragone},
year = {2012},
date = {2012-01-01},
journal = {Journal of Software: Evolution and Process},
volume = {24},
number = {4},
pages = {375-400},
abstract = {Feature modeling an attractive technique for capturing commonality as well as variability within an application domain for generative programming and software product line engineering. Feature models symbolize an overarching representation of the possible application configuration space, and can hence be customized based on specific domain requirements and stakeholder goals. Most interactive or semi-automated feature model customization processes neglect the need to have a holistic approach towards the integration and satisfaction of the stakeholder's soft and hard constraints, and the application-domain integrity constraints. In this paper, we will show how the structure and constraints of a feature model can be modeled uniformly through Propositional Logic extended with concrete domains, called Pscr(��). Furthermore, we formalize the representation of soft constraints in fuzzy ��(��) and explain how semi-automated feature model customization is performed in this setting. The model configuration derivation process that we propose respects the soundness and completeness properties.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}

Close
Feature modeling an attractive technique for capturing commonality as well as variability within an application domain for generative programming and software product line engineering. Feature models symbolize an overarching representation of the possible application configuration space, and can hence be customized based on specific domain requirements and stakeholder goals. Most interactive or semi-automated feature model customization processes neglect the need to have a holistic approach towards the integration and satisfaction of the stakeholder's soft and hard constraints, and the application-domain integrity constraints. In this paper, we will show how the structure and constraints of a feature model can be modeled uniformly through Propositional Logic extended with concrete domains, called Pscr(&#55349;&#56489;). Furthermore, we formalize the representation of soft constraints in fuzzy &#55349;&#56491;(&#55349;&#56489;) and explain how semi-automated feature model customization is performed in this setting. The model configuration derivation process that we propose respects the soundness and completeness properties.
Close
Bagheri, Ebrahim; Gasevic, Dragan; Pan, Jeff Z
Guest Editorial Foreword to the Special Issue on Semantics-Enabled Software Engineering Journal Article
In: IEEE Transactions on Systems, Man, and Cybernetics, Part C, vol. 42, no. 1, pp. 1-2, 2012.
Abstract | BibTeX | Tags:
@article{DBLP:journals/tsmc/BagheriGP12,
title = {Guest Editorial Foreword to the Special Issue on Semantics-Enabled
Software Engineering},
author = {Ebrahim Bagheri and Dragan Gasevic and Jeff Z Pan},
year = {2012},
date = {2012-01-01},
journal = {IEEE Transactions on Systems, Man, and Cybernetics, Part
C},
volume = {42},
number = {1},
pages = {1-2},
abstract = {The six papers in this special issue depict the state of the art and practice of the impact of semantic technologies in the field of Software Engineering.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}

Close
The six papers in this special issue depict the state of the art and practice of the impact of semantic technologies in the field of Software Engineering.
Close
Ognjanovic, Ivana; Mohabbati, Bardia; Gasevic, Dragan; Bagheri, Ebrahim; Boskovic, Marko
A Metaheuristic Approach for the Configuration of Business Process Families Proceedings Article
In: IEEE SCC, pp. 25-32, 2012.
Abstract | BibTeX | Tags:
@inproceedings{DBLP:conf/IEEEscc/OgnjanovicMGBB12,
title = {A Metaheuristic Approach for the Configuration of Business
Process Families},
author = {Ivana Ognjanovic and Bardia Mohabbati and Dragan Gasevic and Ebrahim Bagheri and Marko Boskovic},
year = {2012},
date = {2012-01-01},
booktitle = {IEEE SCC},
pages = {25-32},
crossref = {DBLP:conf/IEEEscc/2012},
abstract = {Business process families provide an over-arching representation of the possible business processes of a target domain. They are defined by capturing the similarities and differences among the possible business processes of the target domain. To realize a business process family into a concrete business process model, the variability points of the business process family need to be bounded. The decision on how to bind these variation points boils down to the stakeholders requirements and needs. Given specific requirements from the stakeholders, the business process family can be configured. This paper formally introduces and empirically evaluates a framework called ConfBPFM that utilizes standard techniques for identifying stakeholders quality requirements and employs a metaheuristic search algorithm (i.e., Genetic Algorithms) to optimally configure a business process family.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
Business process families provide an over-arching representation of the possible business processes of a target domain. They are defined by capturing the similarities and differences among the possible business processes of the target domain. To realize a business process family into a concrete business process model, the variability points of the business process family need to be bounded. The decision on how to bind these variation points boils down to the stakeholders requirements and needs. Given specific requirements from the stakeholders, the business process family can be configured. This paper formally introduces and empirically evaluates a framework called ConfBPFM that utilizes standard techniques for identifying stakeholders quality requirements and employs a metaheuristic search algorithm (i.e., Genetic Algorithms) to optimally configure a business process family.
Close
Soltani, Samaneh; Asadi, Mohsen; Gasevic, Dragan; Hatala, Marek; Bagheri, Ebrahim
Automated planning for feature model configuration based on functional and non-functional requirements Proceedings Article
In: SPLC (1), pp. 56-65, 2012.
Abstract | BibTeX | Tags:
@inproceedings{DBLP:conf/splc/SoltaniAGHB12,
title = {Automated planning for feature model configuration based
on functional and non-functional requirements},
author = {Samaneh Soltani and Mohsen Asadi and Dragan Gasevic and Marek Hatala and Ebrahim Bagheri},
year = {2012},
date = {2012-01-01},
booktitle = {SPLC (1)},
pages = {56-65},
crossref = {DBLP:conf/splc/2012-1},
abstract = {Feature modeling is one of the main techniques used in Software Product Line Engineering to manage the variability within the products of a family. Concrete products of the family can be generated through a configuration process. The configuration process selects and/or removes features from the feature model according to the stakeholders requirements. Selecting the right set of features for one product from amongst all of the available features in the feature model is a complex task because: 1) the multiplicity of stakeholders functional requirements; 2) the positive or negative impact of features on non-functional properties; and 3) the stakeholders preferences w.r.t. the desirable non-functional properties of the final product. Many configurations techniques have already been proposed to facilitate automated product derivation. However, most of the current proposals are not designed to consider stakeholders preferences and constraints especially with regard to non-functional properties. We address the software product line configuration problem and propose a framework, which employs an artificial intelligence planning technique to automatically select suitable features that satisfy both the stakeholders functional and non-functional preferences and constraints. We also provide tooling support to facilitate the use of our framework. Our experiments show that despite the complexity involved with the simultaneous consideration of both functional and non-functional properties our configuration technique is scalable.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
Feature modeling is one of the main techniques used in Software Product Line Engineering to manage the variability within the products of a family. Concrete products of the family can be generated through a configuration process. The configuration process selects and/or removes features from the feature model according to the stakeholders requirements. Selecting the right set of features for one product from amongst all of the available features in the feature model is a complex task because: 1) the multiplicity of stakeholders functional requirements; 2) the positive or negative impact of features on non-functional properties; and 3) the stakeholders preferences w.r.t. the desirable non-functional properties of the final product. Many configurations techniques have already been proposed to facilitate automated product derivation. However, most of the current proposals are not designed to consider stakeholders preferences and constraints especially with regard to non-functional properties. We address the software product line configuration problem and propose a framework, which employs an artificial intelligence planning technique to automatically select suitable features that satisfy both the stakeholders functional and non-functional preferences and constraints. We also provide tooling support to facilitate the use of our framework. Our experiments show that despite the complexity involved with the simultaneous consideration of both functional and non-functional properties our configuration technique is scalable.
Close
Ensan, Faezeh; Bagheri, Ebrahim; Gasevic, Dragan
Evolutionary Search-Based Test Generation for Software Product Line Feature Models Proceedings Article
In: CAiSE, pp. 613-628, 2012.
Abstract | BibTeX | Tags:
@inproceedings{DBLP:conf/caise/EnsanBG12,
title = {Evolutionary Search-Based Test Generation for Software Product
Line Feature Models},
author = {Faezeh Ensan and Ebrahim Bagheri and Dragan Gasevic},
year = {2012},
date = {2012-01-01},
booktitle = {CAiSE},
pages = {613-628},
crossref = {DBLP:conf/caise/2012},
abstract = {Product line-based software engineering is a paradigm that models the commonalities and variabilities of different applications of a given domain of interest within a unique framework and enhances rapid and low cost development of new applications based on reuse engineering principles. Despite the numerous advantages of software product lines, it is quite challenging to comprehensively test them. This is due to the fact that a product line can potentially represent many different applications; therefore, testing a single product line requires the test of its various applications. Theoretically, a product line with n software features can be a source for the development of 2^n application. This requires the test of 2^n applications if a brute-force comprehensive testing strategy is adopted. In this paper, we propose an evolutionary testing approach based on Genetic Algorithms to explore the configuration space of a software product line feature model in order to automatically generate test suites. We will show through the use of several publicly-available product line feature models that the proposed approach is able to generate test suites of O(n) size complexity as opposed to O(2^n) while at the same time form a suitable tradeoff balance between error coverage and feature coverage in its generated test suites.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
Product line-based software engineering is a paradigm that models the commonalities and variabilities of different applications of a given domain of interest within a unique framework and enhances rapid and low cost development of new applications based on reuse engineering principles. Despite the numerous advantages of software product lines, it is quite challenging to comprehensively test them. This is due to the fact that a product line can potentially represent many different applications; therefore, testing a single product line requires the test of its various applications. Theoretically, a product line with n software features can be a source for the development of 2^n application. This requires the test of 2^n applications if a brute-force comprehensive testing strategy is adopted. In this paper, we propose an evolutionary testing approach based on Genetic Algorithms to explore the configuration space of a software product line feature model in order to automatically generate test suites. We will show through the use of several publicly-available product line feature models that the proposed approach is able to generate test suites of O(n) size complexity as opposed to O(2^n) while at the same time form a suitable tradeoff balance between error coverage and feature coverage in its generated test suites.
Close
Bagheri, Ebrahim; Ensan, Faezeh; Gasevic, Dragan
Grammar-based Test Generation for Software Product Line Feature Models Proceedings Article
In: Proceedings of the 2012 Conference of the Centre for Advanced Studies on Collaborative Research (CASCON 2012), pp. 87–101, 2012.
Abstract | BibTeX | Tags:
@inproceedings{DBLP:conf/cascon/BagheriEG12,
title = {Grammar-based Test Generation for Software Product Line Feature Models},
author = {Ebrahim Bagheri and Faezeh Ensan and Dragan Gasevic},
year = {2012},
date = {2012-01-01},
booktitle = {Proceedings of the 2012 Conference of the Centre for Advanced Studies on Collaborative Research (CASCON 2012)},
pages = {87--101},
abstract = {Product lines are often employed for the facilitation of software re-use, rapid application development and increase in productivity. Despite the numerous advantages of software product lines, the task of testing them is a cumbersome process due to the fact that the number of applications that need to be tested is exponential to the number of features represented in the product line. In this paper, we attempt to reduce the number of required tests for testing a software product line while at the same time preserving an acceptable fault coverage. For this purpose, we introduce eight coverage criteria based on the transformation of software product line feature models into formal context-free grammars. The theoretical foundation for the proposed coverage criteria is based on the development of equivalence partitions on the software product line configuration space and the use of boundary value analysis for test suite generation. We have performed experiments on several SPLOT feature models, the results of which show that the test suite generation strategies based on the proposed coverage criteria are effective in significantly reducing the number of required tests and at the same time maintaining a high fault coverage ratio.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
Product lines are often employed for the facilitation of software re-use, rapid application development and increase in productivity. Despite the numerous advantages of software product lines, the task of testing them is a cumbersome process due to the fact that the number of applications that need to be tested is exponential to the number of features represented in the product line. In this paper, we attempt to reduce the number of required tests for testing a software product line while at the same time preserving an acceptable fault coverage. For this purpose, we introduce eight coverage criteria based on the transformation of software product line feature models into formal context-free grammars. The theoretical foundation for the proposed coverage criteria is based on the development of equivalence partitions on the software product line configuration space and the use of boundary value analysis for test suite generation. We have performed experiments on several SPLOT feature models, the results of which show that the test suite generation strategies based on the proposed coverage criteria are effective in significantly reducing the number of required tests and at the same time maintaining a high fault coverage ratio.
Close
Noorian, Mahdi; Bagheri, Ebrahim; Du, Weichang
Non-functional Properties in Software Product Lines: A Taxonomy for Classification Proceedings Article
In: SEKE, pp. 663-667, 2012.
Abstract | BibTeX | Tags:
@inproceedings{DBLP:conf/seke/NoorianBD12,
title = {Non-functional Properties in Software Product Lines: A Taxonomy
for Classification},
author = {Mahdi Noorian and Ebrahim Bagheri and Weichang Du},
year = {2012},
date = {2012-01-01},
booktitle = {SEKE},
pages = {663-667},
crossref = {DBLP:conf/seke/2012},
abstract = {In the recent years, the software product lines paradigm has gained interest in both industry and academia. As in traditional software development, the concept of quality is crucial for the success of software product line practices and both functional and nonfunctional characteristics must be involved in the development process in order to achieve a high quality software product line. Therefore, many efforts have been made towards the development of quality-based approaches in order to address non-functional properties in software product line development. In this paper, we propose a taxonomy that characterizes and classifies various approaches for employing non-functional properties in software product lines development. The taxonomy not only highlights the major concerns that need to be addressed in the area of quality-based software product lines, but also helps to identify various research gaps that need to be filled in future work in this area.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
In the recent years, the software product lines paradigm has gained interest in both industry and academia. As in traditional software development, the concept of quality is crucial for the success of software product line practices and both functional and nonfunctional characteristics must be involved in the development process in order to achieve a high quality software product line. Therefore, many efforts have been made towards the development of quality-based approaches in order to address non-functional properties in software product line development. In this paper, we propose a taxonomy that characterizes and classifies various approaches for employing non-functional properties in software product lines development. The taxonomy not only highlights the major concerns that need to be addressed in the area of quality-based software product lines, but also helps to identify various research gaps that need to be filled in future work in this area.
Close
Asadi, Mohsen; Bagheri, Ebrahim; Mohabbati, Bardia; Gasevic, Dragan
Requirements engineering in feature oriented software product lines: an initial analytical study Proceedings Article
In: SPLC (2), pp. 36-44, 2012.
BibTeX | Tags:
@inproceedings{DBLP:conf/splc/AsadiBMG12,
title = {Requirements engineering in feature oriented software product
lines: an initial analytical study},
author = {Mohsen Asadi and Ebrahim Bagheri and Bardia Mohabbati and Dragan Gasevic},
year = {2012},
date = {2012-01-01},
booktitle = {SPLC (2)},
pages = {36-44},
crossref = {DBLP:conf/splc/2012-2},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
2011
Bagheri, Ebrahim; Gasevic, Dragan
Assessing the maintainability of software product line feature models using structural metrics Journal Article
In: Software Quality Journal, vol. 19, no. 3, pp. 579-612, 2011.
Abstract | BibTeX | Tags:
@article{DBLP:journals/sqj/BagheriG11,
title = {Assessing the maintainability of software product line feature
models using structural metrics},
author = {Ebrahim Bagheri and Dragan Gasevic},
year = {2011},
date = {2011-01-01},
journal = {Software Quality Journal},
volume = {19},
number = {3},
pages = {579-612},
abstract = {A software product line is a unified representation of a set of conceptually similar software systems that share many common features and satisfy the requirements of a particular domain. Within the context of software product lines, feature models are tree-like structures that are widely used for modeling and representing the inherent commonality and variability of software product lines. Given the fact that many different software systems can be spawned from a single software product line, it can be anticipated that a low-quality design can ripple through to many spawned software systems. Therefore, the need for early indicators of external quality attributes is recognized in order to avoid the implications of defective and low-quality design during the late stages of production. In this paper, we propose a set of structural metrics for software product line feature models and theoretically validate them using valid measurement-theoretic principles. Further, we investigate through controlled experimentation whether these structural metrics can be good predictors (early indicators) of the three main subcharacteristics of maintainability: analyzability, changeability, and understandability. More specifically, a four-step analysis is conducted: (1) investigating whether feature model structural metrics are correlated with feature model maintainability through the employment of classical statistical correlation techniques; (2) understanding how well each of the structural metrics can serve as discriminatory references for maintainability; (3) identifying the sufficient set of structural metrics for evaluating each of the subcharacteristics of maintainability; and (4) evaluating how well different prediction models based on the proposed structural metrics can perform in indicating the maintainability of a feature model. Results obtained from the controlled experiment support the idea that useful prediction models can be built for the purpose of evaluating feature model maintainability using early structural metrics. Some of the structural metrics show significant correlation with the subjective perception of the subjects about the maintainability of the feature models.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}

Close
A software product line is a unified representation of a set of conceptually similar software systems that share many common features and satisfy the requirements of a particular domain. Within the context of software product lines, feature models are tree-like structures that are widely used for modeling and representing the inherent commonality and variability of software product lines. Given the fact that many different software systems can be spawned from a single software product line, it can be anticipated that a low-quality design can ripple through to many spawned software systems. Therefore, the need for early indicators of external quality attributes is recognized in order to avoid the implications of defective and low-quality design during the late stages of production. In this paper, we propose a set of structural metrics for software product line feature models and theoretically validate them using valid measurement-theoretic principles. Further, we investigate through controlled experimentation whether these structural metrics can be good predictors (early indicators) of the three main subcharacteristics of maintainability: analyzability, changeability, and understandability. More specifically, a four-step analysis is conducted: (1) investigating whether feature model structural metrics are correlated with feature model maintainability through the employment of classical statistical correlation techniques; (2) understanding how well each of the structural metrics can serve as discriminatory references for maintainability; (3) identifying the sufficient set of structural metrics for evaluating each of the subcharacteristics of maintainability; and (4) evaluating how well different prediction models based on the proposed structural metrics can perform in indicating the maintainability of a feature model. Results obtained from the controlled experiment support the idea that useful prediction models can be built for the purpose of evaluating feature model maintainability using early structural metrics. Some of the structural metrics show significant correlation with the subjective perception of the subjects about the maintainability of the feature models.
Close
Boskovic, Marko; Gasevic, Dragan; Mohabbati, Bardia; Asadi, Mohsen; Hatala, Marek; Kaviani, Nima; Rusk, Jeffrey J; Bagheri, Ebrahim
Developing Families of Software Services: A Semantic Web Approach Journal Article
In: Journal of Research and Practice in Information Technology, vol. 43, no. 3, pp. 179-208, 2011.
Abstract | BibTeX | Tags:
@article{DBLP:journals/acj/BoskovicGMAHKR11,
title = {Developing Families of Software Services: A Semantic Web
Approach},
author = {Marko Boskovic and Dragan Gasevic and Bardia Mohabbati and Mohsen Asadi and Marek Hatala and Nima Kaviani and Jeffrey J Rusk and Ebrahim Bagheri},
year = {2011},
date = {2011-01-01},
journal = {Journal of Research and Practice in Information Technology},
volume = {43},
number = {3},
pages = {179-208},
abstract = {Current experience in the development of service-oriented systems indicates that tasks such as discovery, systematic reuse, and appropriate composition of services are difficult and error prone. For addressing these issues, the application of Software Product Line Engineering (SPLE) appears to be a promising approach. SPLE promotes systematic reuse in the development of systems with similar requirements. SPLE promotes reuse by development of comprehensive sets of systems (aka software families) as a whole (domain engineering), and later configuring them according to the functionality requested by the stakeholders (application engineering). Furthermore, the reuse of services that are part of a software product line is enhanced, since each time a member of SPL is instantiated and deployed, services that are used in the instantiated member are reused. The research community have recognized and proposed several promising solutions to the development of service-oriented systems based on the SPLE principles; however, there have been little results that report on concrete tools that automate and thus, reduce the amount of efforts needed for completing specific tasks of a development process. In this paper, we first introduce a process for developing service-oriented families. Then, we investigate the use of Semantic Web technologies and ontologies to support service discovery, context description, and verification in domain engineering; and staged configuration and service composition generation in application engineering of the introduced development process. We evaluate the effectiveness of our Semantic Web-based proposals in terms of optimizing the amount of efforts necessary for completing relevant tasks in the discussed development process.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}

Close
Current experience in the development of service-oriented systems indicates that tasks such as discovery, systematic reuse, and appropriate composition of services are difficult and error prone. For addressing these issues, the application of Software Product Line Engineering (SPLE) appears to be a promising approach. SPLE promotes systematic reuse in the development of systems with similar requirements. SPLE promotes reuse by development of comprehensive sets of systems (aka software families) as a whole (domain engineering), and later configuring them according to the functionality requested by the stakeholders (application engineering). Furthermore, the reuse of services that are part of a software product line is enhanced, since each time a member of SPL is instantiated and deployed, services that are used in the instantiated member are reused. The research community have recognized and proposed several promising solutions to the development of service-oriented systems based on the SPLE principles; however, there have been little results that report on concrete tools that automate and thus, reduce the amount of efforts needed for completing specific tasks of a development process. In this paper, we first introduce a process for developing service-oriented families. Then, we investigate the use of Semantic Web technologies and ontologies to support service discovery, context description, and verification in domain engineering; and staged configuration and service composition generation in application engineering of the introduced development process. We evaluate the effectiveness of our Semantic Web-based proposals in terms of optimizing the amount of efforts necessary for completing relevant tasks in the discussed development process.
Close
Bagheri, Ebrahim; Ensan, Faezeh; Gasevic, Dragan; Boskovic, Marko
Modular Feature Models: Representation and Configuration Journal Article
In: Journal of Research and Practice in Information Technology, vol. 43, no. 2, pp. 109-140, 2011.
Abstract | BibTeX | Tags:
@article{DBLP:journals/acj/BagheriEGB11,
title = {Modular Feature Models: Representation and Configuration},
author = {Ebrahim Bagheri and Faezeh Ensan and Dragan Gasevic and Marko Boskovic},
year = {2011},
date = {2011-01-01},
journal = {Journal of Research and Practice in Information Technology},
volume = {43},
number = {2},
pages = {109-140},
abstract = {Within the realm of software product line engineering, feature modeling is one of the widely used techniques for modeling commonality as well as variability. Feature models incorporate the entire domain application configuration space, and are therefore developed collectively by teams of domain experts. In large scale industrial domains, feature models become too complex both in terms of maintenance and configuration. In order to make the maintenance and configuration of feature models feasible, we propose to modularize feature models based on the well-established Distributed Description Logics formalism. Modular feature models provide for an enhanced collaborative/ distributed feature model design, more efficient feature model evolution and better reusability of feature model structure. We also develop methods for the configuration and configuration verification of a modular feature model based on standard inference mechanisms. We describe and evaluate our proposed approach through a case study on an online electronic store application domain.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}

Close
Within the realm of software product line engineering, feature modeling is one of the widely used techniques for modeling commonality as well as variability. Feature models incorporate the entire domain application configuration space, and are therefore developed collectively by teams of domain experts. In large scale industrial domains, feature models become too complex both in terms of maintenance and configuration. In order to make the maintenance and configuration of feature models feasible, we propose to modularize feature models based on the well-established Distributed Description Logics formalism. Modular feature models provide for an enhanced collaborative/ distributed feature model design, more efficient feature model evolution and better reusability of feature model structure. We also develop methods for the configuration and configuration verification of a modular feature model based on standard inference mechanisms. We describe and evaluate our proposed approach through a case study on an online electronic store application domain.
Close
Mohabbati, Bardia; Gasevic, Dragan; Hatala, Marek; Asadi, Mohsen; Bagheri, Ebrahim; Boskovic, Marko
A Quality Aggregation Model for Service-Oriented Software Product Lines Based on Variability and Composition Patterns Proceedings Article
In: ICSOC, pp. 436-451, 2011.
Abstract | BibTeX | Tags:
@inproceedings{DBLP:conf/icsoc/MohabbatiGHABB11,
title = {A Quality Aggregation Model for Service-Oriented Software
Product Lines Based on Variability and Composition Patterns},
author = {Bardia Mohabbati and Dragan Gasevic and Marek Hatala and Mohsen Asadi and Ebrahim Bagheri and Marko Boskovic},
year = {2011},
date = {2011-01-01},
booktitle = {ICSOC},
pages = {436-451},
crossref = {DBLP:conf/icsoc/2011},
abstract = {Quality evaluation is a challenging task in monolithic software systems, and is even more complex when it comes to Service-Oriented Software Product Lines (SOSPL), as it needs to analyze the attributes of a family of SOA systems. In SOSPL, variability can be managed and planned at the architectural level to develop a software product with the same set of functionalities but different degrees of non-functional quality attribute satisfaction. Therefore, architectural quality evaluation becomes crucial due to the fact that it allows for the examination of whether or not the final product satisfies and guarantees all the ranges of quality requirements within the envisioned scope. This paper addresses the open research problem of aggregating QoS attribute ranges with respect to architectural variability. Previous solutions for quality aggregation do not consider architectural variability for composite services. Our approach introduces variability patterns that can possibly occur at the architectural level of a SOSPL. We propose an aggregation model for QoS computation which takes both variability and composition patterns into account.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
Quality evaluation is a challenging task in monolithic software systems, and is even more complex when it comes to Service-Oriented Software Product Lines (SOSPL), as it needs to analyze the attributes of a family of SOA systems. In SOSPL, variability can be managed and planned at the architectural level to develop a software product with the same set of functionalities but different degrees of non-functional quality attribute satisfaction. Therefore, architectural quality evaluation becomes crucial due to the fact that it allows for the examination of whether or not the final product satisfies and guarantees all the ranges of quality requirements within the envisioned scope. This paper addresses the open research problem of aggregating QoS attribute ranges with respect to architectural variability. Previous solutions for quality aggregation do not consider architectural variability for composite services. Our approach introduces variability patterns that can possibly occur at the architectural level of a SOSPL. We propose an aggregation model for QoS computation which takes both variability and composition patterns into account.
Close
Soltani, Samaneh; Asadi, Mohsen; Hatala, Marek; Gasevic, Dragan; Bagheri, Ebrahim
Automated planning for feature model configuration based on stakeholders' business concerns Proceedings Article
In: ASE, pp. 536-539, 2011.
Abstract | BibTeX | Tags:
@inproceedings{DBLP:conf/kbse/SoltaniAHGB11,
title = {Automated planning for feature model configuration based
on stakeholders' business concerns},
author = {Samaneh Soltani and Mohsen Asadi and Marek Hatala and Dragan Gasevic and Ebrahim Bagheri},
year = {2011},
date = {2011-01-01},
booktitle = {ASE},
pages = {536-539},
crossref = {DBLP:conf/kbse/2011},
abstract = {Feature modeling is the main mechanism used in Software Product Line Engineering to manage the differences between the products of a family. Concrete products of the family can be generated through a configuration process over a feature model. The configuration process selects and/or removes features from the feature model according to the stakeholders' requirements. Selecting the right set of features for one product from all the available features in the feature model is a cumbersome task because 1) the stakeholders have functional requirements; 2) features may have negative and positive contributions on different business concerns; and 3) the stakeholders may have diverse business concerns and limited resources that they can spend on a product. Many configurations techniques have been proposed to facilitate software developers' tasks through automated product derivation. However, most of the current proposals for automatic configuration are not devised to cope with business oriented requirements and stakeholders' resource limitations. We address the software product line configuration problem and propose a framework, which employs an artificial intelligence planning technique to automatically select suitable features that satisfy the stakeholders' business concerns and resource limitations. We also provide tooling and methodology support to facilitate the use of our framework.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
Feature modeling is the main mechanism used in Software Product Line Engineering to manage the differences between the products of a family. Concrete products of the family can be generated through a configuration process over a feature model. The configuration process selects and/or removes features from the feature model according to the stakeholders' requirements. Selecting the right set of features for one product from all the available features in the feature model is a cumbersome task because 1) the stakeholders have functional requirements; 2) features may have negative and positive contributions on different business concerns; and 3) the stakeholders may have diverse business concerns and limited resources that they can spend on a product. Many configurations techniques have been proposed to facilitate software developers' tasks through automated product derivation. However, most of the current proposals for automatic configuration are not devised to cope with business oriented requirements and stakeholders' resource limitations. We address the software product line configuration problem and propose a framework, which employs an artificial intelligence planning technique to automatically select suitable features that satisfy the stakeholders' business concerns and resource limitations. We also provide tooling and methodology support to facilitate the use of our framework.
Close
Bagheri, Ebrahim; Asadi, Mohsen; Ensan, Faezeh; Gasevic, Dragan; Mohabbati, Bardia
Bringing Semantics to Feature Models with SAFMDL Proceedings Article
In: Proceedings of the 2011 Conference of the Centre for Advanced Studies on Collaborative Research (CASCON 2011), pp. 287-300, 2011.
Abstract | BibTeX | Tags:
@inproceedings{DBLP:conf/cascon/BagheriAEGM11,
title = {Bringing Semantics to Feature Models with SAFMDL},
author = {Ebrahim Bagheri and Mohsen Asadi and Faezeh Ensan and Dragan Gasevic and Bardia Mohabbati},
year = {2011},
date = {2011-01-01},
booktitle = {Proceedings of the 2011 Conference of the Centre for Advanced Studies on Collaborative Research (CASCON 2011)},
pages = {287-300},
abstract = {Software product line engineering is a paradigm that advocates the reusability of software engi-neering assets and the rapid development of new applications for a target domain. These objectives are achieved by capturing the commonalities and variabilities between the applications of a target domain and through the development of comprehensive and variability-covering domain models. The domain models developed within the software product line development process need to cover all of the possible features and aspects of the target domain. In other words, the domain models often described using feature models should be elaborate representations of the feature space of that domain. In order to operationalize feature-based representations of a software application, appropriate implementation mechanisms need to be employed. In this paper, we propose a Seman-tic Web-oriented language, called Semantic Annotations for Feature Modeling Description Language (SAFMDL) that provides the means to semantically describe feature models. We will show that using SAFMDL along with Semantic Web Query techniques, we are able to bridge the gap between software product lines and SOA technology. Our proposed work allows software practitioners to use Semantic Web technology to quickly and rapidly develop new software prod-ucts based on SOA technology from software product lines.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
Software product line engineering is a paradigm that advocates the reusability of software engi-neering assets and the rapid development of new applications for a target domain. These objectives are achieved by capturing the commonalities and variabilities between the applications of a target domain and through the development of comprehensive and variability-covering domain models. The domain models developed within the software product line development process need to cover all of the possible features and aspects of the target domain. In other words, the domain models often described using feature models should be elaborate representations of the feature space of that domain. In order to operationalize feature-based representations of a software application, appropriate implementation mechanisms need to be employed. In this paper, we propose a Seman-tic Web-oriented language, called Semantic Annotations for Feature Modeling Description Language (SAFMDL) that provides the means to semantically describe feature models. We will show that using SAFMDL along with Semantic Web Query techniques, we are able to bridge the gap between software product lines and SOA technology. Our proposed work allows software practitioners to use Semantic Web technology to quickly and rapidly develop new software prod-ucts based on SOA technology from software product lines.
Close
Ognjanovic, Ivana; Gasevic, Dragan; Bagheri, Ebrahim; Asadi, Mohsen
Conditional preferences in software stakeholders' judgments Proceedings Article
In: SAC, pp. 683-690, 2011.
Abstract | BibTeX | Tags:
@inproceedings{DBLP:conf/sac/OgnjanovicGBA11,
title = {Conditional preferences in software stakeholders' judgments},
author = {Ivana Ognjanovic and Dragan Gasevic and Ebrahim Bagheri and Mohsen Asadi},
year = {2011},
date = {2011-01-01},
booktitle = {SAC},
pages = {683-690},
crossref = {DBLP:conf/sac/2011},
abstract = {In reality, many of the stakeholders' decisions about their desirable requirements can be dependent on other internal or external factors. Such dependencies entail conditionality between the requirements that have been defined, e.g., a requirement is desirable for the stakeholders only if a certain condition is met or some other requirements are excluded. In this paper, we propose a novel framework that tackles the challenge of capturing and processing software stakeholders' conditional preferences. Our proposal ex-tends the Stratified Analytic Hierarchical Process (S-AHP) method that we have previously introduced. S-AHP is built on top of the Analytic Hierarchical Process method, which performs a pair-wise comparison of stakeholders' preferences. The current main framework for handling conditionality is TCP-nets, which suffers from the inability to handle hierarchical structure of comparisons and cycles in dependencies defined by the conditional requirements. Also, TCP-nets is primarily developed for qualitative preferences and its quantitative extensions cannot completely capture quantitative relative importance. We show that our framework is able to address these shortcomings of TCP-nets while preserving many of its advantages.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
In reality, many of the stakeholders' decisions about their desirable requirements can be dependent on other internal or external factors. Such dependencies entail conditionality between the requirements that have been defined, e.g., a requirement is desirable for the stakeholders only if a certain condition is met or some other requirements are excluded. In this paper, we propose a novel framework that tackles the challenge of capturing and processing software stakeholders' conditional preferences. Our proposal ex-tends the Stratified Analytic Hierarchical Process (S-AHP) method that we have previously introduced. S-AHP is built on top of the Analytic Hierarchical Process method, which performs a pair-wise comparison of stakeholders' preferences. The current main framework for handling conditionality is TCP-nets, which suffers from the inability to handle hierarchical structure of comparisons and cycles in dependencies defined by the conditional requirements. Also, TCP-nets is primarily developed for qualitative preferences and its quantitative extensions cannot completely capture quantitative relative importance. We show that our framework is able to address these shortcomings of TCP-nets while preserving many of its advantages.
Close
Bagheri, Ebrahim; Ensan, Faezeh
Consolidating multiple requirement specifications through argumentation Proceedings Article
In: SAC, pp. 659-666, 2011.
Abstract | BibTeX | Tags:
@inproceedings{DBLP:conf/sac/BagheriE11,
title = {Consolidating multiple requirement specifications through
argumentation},
author = {Ebrahim Bagheri and Faezeh Ensan},
year = {2011},
date = {2011-01-01},
booktitle = {SAC},
pages = {659-666},
crossref = {DBLP:conf/sac/2011},
abstract = {The process of handling inconsistencies in software requirements is an important and challenging task. Most often in cases where multiple stakeholders interact with the requirement analysts, inconsistent, discrepant and conflicting information is gathered that needs to be understood and interpreted properly. Recent research has suggested that despite the fact that inconsistencies are not desirable by nature, they can be tolerated in order to better understand the nature of the problem domain and the stakeholders' line of thought. With this in mind, we propose an argumentative approach towards handling inconsistent requirement specifications. In our semi-formal approach, we build on Dung's abstract argumentation framework and represent requirement statements as empharguments. This way we are able to model the interaction of the requirement statements in terms of their inconsistencies and also provide a decision support process for the resolution of inconsistencies. We discuss our approach in detail through a widely used case study and introduce our Eclipse plugin tool supporting the proposed work.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
The process of handling inconsistencies in software requirements is an important and challenging task. Most often in cases where multiple stakeholders interact with the requirement analysts, inconsistent, discrepant and conflicting information is gathered that needs to be understood and interpreted properly. Recent research has suggested that despite the fact that inconsistencies are not desirable by nature, they can be tolerated in order to better understand the nature of the problem domain and the stakeholders' line of thought. With this in mind, we propose an argumentative approach towards handling inconsistent requirement specifications. In our semi-formal approach, we build on Dung's abstract argumentation framework and represent requirement statements as empharguments. This way we are able to model the interaction of the requirement statements in terms of their inconsistencies and also provide a decision support process for the resolution of inconsistencies. We discuss our approach in detail through a widely used case study and introduce our Eclipse plugin tool supporting the proposed work.
Close
Asadi, Mohsen; Mohabbati, Bardia; Gasevic, Dragan; Bagheri, Ebrahim
Developing Families of Method-Oriented Architecture Proceedings Article
In: ME, pp. 168-183, 2011.
Abstract | BibTeX | Tags:
@inproceedings{DBLP:conf/ifip8-1/AsadiMGB11,
title = {Developing Families of Method-Oriented Architecture},
author = {Mohsen Asadi and Bardia Mohabbati and Dragan Gasevic and Ebrahim Bagheri},
year = {2011},
date = {2011-01-01},
booktitle = {ME},
pages = {168-183},
crossref = {DBLP:conf/ifip8-1/2011me},
abstract = {The method engineering paradigm is motivated by the need for software development methods suitable for specific situations and requirements of organizations in general and projects in particular. Assembly-based method engineering, as one of the prominent approaches in method engineering, creates project-specific methods by (re-)using method components, specified with method processes and products, and stored in method repositories. This paper tries to address the two challenges of assembly-based method engineering related to more effective: i) publication and sharing of method components; and ii) management of variability in software methods, which have many commonalties. In order to address these two challenges, we propose the concept of Families of Method-Oriented Architectures. This concept is built on top of the principles of service-oriented architectures and software product lines.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
The method engineering paradigm is motivated by the need for software development methods suitable for specific situations and requirements of organizations in general and projects in particular. Assembly-based method engineering, as one of the prominent approaches in method engineering, creates project-specific methods by (re-)using method components, specified with method processes and products, and stored in method repositories. This paper tries to address the two challenges of assembly-based method engineering related to more effective: i) publication and sharing of method components; and ii) management of variability in software methods, which have many commonalties. In order to address these two challenges, we propose the concept of Families of Method-Oriented Architectures. This concept is built on top of the principles of service-oriented architectures and software product lines.
Close
Cuzzola, John; Gasevic, Dragan; Bagheri, Ebrahim
Fault Detection through Sequential Filtering of Novelty Patterns Proceedings Article
In: ICMLA (1), pp. 217-222, 2011.
Abstract | BibTeX | Tags:
@inproceedings{DBLP:conf/icmla/CuzzolaGB11,
title = {Fault Detection through Sequential Filtering of Novelty
Patterns},
author = {John Cuzzola and Dragan Gasevic and Ebrahim Bagheri},
year = {2011},
date = {2011-01-01},
booktitle = {ICMLA (1)},
pages = {217-222},
crossref = {DBLP:conf/icmla/2011-1},
abstract = {Multi-threaded applications are commonplace in today's software landscape. Pushing the boundaries of concurrency and parallelism, programmers are maximizing performance demanded by stakeholders. However, multi-threaded programs are challenging to test and debug. Prone to their own set of unique faults, such as race conditions, testers need to turn to automated validation tools for assistance. This paper's main contribution is a new algorithm called multi-stage novelty filtering (MSNF) that can aid in the discovery of software faults. MSNF stresses minimal configuration, no domain specific data preprocessing or software metrics. The MSNF approach is based on a multi-layered support vector machine scheme. After experimentation with the MSNF algorithm, we observed promising results in terms of precision. However, MSNF relies on multiple iterations (i.e., stages). Here, we propose four different strategies for estimating the number of the requested stages.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
Multi-threaded applications are commonplace in today's software landscape. Pushing the boundaries of concurrency and parallelism, programmers are maximizing performance demanded by stakeholders. However, multi-threaded programs are challenging to test and debug. Prone to their own set of unique faults, such as race conditions, testers need to turn to automated validation tools for assistance. This paper's main contribution is a new algorithm called multi-stage novelty filtering (MSNF) that can aid in the discovery of software faults. MSNF stresses minimal configuration, no domain specific data preprocessing or software metrics. The MSNF approach is based on a multi-layered support vector machine scheme. After experimentation with the MSNF algorithm, we observed promising results in terms of precision. However, MSNF relies on multiple iterations (i.e., stages). Here, we propose four different strategies for estimating the number of the requested stages.
Close
Noorian, Mahdi; Ensan, Alireza; Bagheri, Ebrahim; Boley, Harold; Biletskiy, Yevgen
Feature Model Debugging based on Description Logic Reasoning Proceedings Article
In: DMS, pp. 158-164, 2011.
BibTeX | Tags:
@inproceedings{DBLP:conf/dms/NoorianEBBB11,
title = {Feature Model Debugging based on Description Logic Reasoning},
author = {Mahdi Noorian and Alireza Ensan and Ebrahim Bagheri and Harold Boley and Yevgen Biletskiy},
year = {2011},
date = {2011-01-01},
booktitle = {DMS},
pages = {158-164},
crossref = {DBLP:conf/dms/2011},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
Asadi, Mohsen; Bagheri, Ebrahim; Gasevic, Dragan; Hatala, Marek; Mohabbati, Bardia
Goal-driven software product line engineering Proceedings Article
In: SAC, pp. 691-698, 2011.
Abstract | BibTeX | Tags:
@inproceedings{DBLP:conf/sac/AsadiBGHM11,
title = {Goal-driven software product line engineering},
author = {Mohsen Asadi and Ebrahim Bagheri and Dragan Gasevic and Marek Hatala and Bardia Mohabbati},
year = {2011},
date = {2011-01-01},
booktitle = {SAC},
pages = {691-698},
crossref = {DBLP:conf/sac/2011},
abstract = {Feature Models encapsulate functionalities and quality properties of a product family. The employment of feature models for managing variability and commonality of large-scale product families raises an important question: on what basis should the features of a product family be selected for a target software application, which is going to be derived from the product family. Thus, the selection of the most suitable features for a specific application requires the understanding of its stakeholders' intentions and also the relationship between their intentions and the available software features. To address this important issue, we adopt a standard goal-oriented requirements engineering framework, i.e., the i* framework, for identifying stakeholders' intentions and propose an approach for explicitly mapping and bridging between the features of a product family and the goals and objectives of the stakeholders. We propose a novel approach to automatically pre-configure a given feature model based on the objectives of the target product stakeholders. Also, our approach is able to elucidate the rationale behind the selection of the most important features of a family for a target application.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
Feature Models encapsulate functionalities and quality properties of a product family. The employment of feature models for managing variability and commonality of large-scale product families raises an important question: on what basis should the features of a product family be selected for a target software application, which is going to be derived from the product family. Thus, the selection of the most suitable features for a specific application requires the understanding of its stakeholders' intentions and also the relationship between their intentions and the available software features. To address this important issue, we adopt a standard goal-oriented requirements engineering framework, i.e., the i* framework, for identifying stakeholders' intentions and propose an approach for explicitly mapping and bridging between the features of a product family and the goals and objectives of the stakeholders. We propose a novel approach to automatically pre-configure a given feature model based on the objectives of the target product stakeholders. Also, our approach is able to elucidate the rationale behind the selection of the most important features of a family for a target application.
Close
Ensan, Alireza; Bagheri, Ebrahim; Asadi, Mohsen; Gasevic, Dragan; Biletskiy, Yevgen
Goal-Oriented Test Case Selection and Prioritization for Product Line Feature Models Proceedings Article
In: ITNG, pp. 291-298, 2011.
Abstract | BibTeX | Tags:
@inproceedings{DBLP:conf/itng/EnsanBAGB11,
title = {Goal-Oriented Test Case Selection and Prioritization for
Product Line Feature Models},
author = {Alireza Ensan and Ebrahim Bagheri and Mohsen Asadi and Dragan Gasevic and Yevgen Biletskiy},
year = {2011},
date = {2011-01-01},
booktitle = {ITNG},
pages = {291-298},
crossref = {DBLP:conf/itng/2011},
abstract = {The software product line engineering paradigm is amongst the widely used means for capturing and handling the commonalities and variabilities of the many applications of a target domain. The large number of possible products and complex interactions between software product line features makes the effective testing of them a challenge. To conquer the time and space complexity involved with testing a product line, an intuitive approach is the reduction of the test space. In this paper, we propose an approach to reduce the product line test space. We introduce a goal-oriented approach for the selection of the most desirable features from the product line. Such an approach allows us to identify the features that are more important and need to be tested more comprehensively from the perspective of the domain stakeholders. The more important features and the configurations that contain them will be given priority over the less important configurations, hence providing a hybrid test case reduction and prioritization strategy for testing software product lines.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
The software product line engineering paradigm is amongst the widely used means for capturing and handling the commonalities and variabilities of the many applications of a target domain. The large number of possible products and complex interactions between software product line features makes the effective testing of them a challenge. To conquer the time and space complexity involved with testing a product line, an intuitive approach is the reduction of the test space. In this paper, we propose an approach to reduce the product line test space. We introduce a goal-oriented approach for the selection of the most desirable features from the product line. Such an approach allows us to identify the features that are more important and need to be tested more comprehensively from the perspective of the domain stakeholders. The more important features and the configurations that contain them will be given priority over the less important configurations, hence providing a hybrid test case reduction and prioritization strategy for testing software product lines.
Close
Noorian, Mahdi; Bagheri, Ebrahim; Du, Weichang
Machine Learning-based Software Testing: Towards a Classification Framework Proceedings Article
In: SEKE, pp. 225-229, 2011.
Abstract | BibTeX | Tags:
@inproceedings{DBLP:conf/seke/NoorianBD11,
title = {Machine Learning-based Software Testing: Towards a Classification
Framework},
author = {Mahdi Noorian and Ebrahim Bagheri and Weichang Du},
year = {2011},
date = {2011-01-01},
booktitle = {SEKE},
pages = {225-229},
crossref = {DBLP:conf/seke/2011},
abstract = {Software Testing (ST) processes attempt to verify and validate the capability of a software system to meet its required attributes and functionality. As software systems become more complex, the need for automated software testing methods emerges. Machine Learning (ML) techniques have shown to be quite useful for this automation process. Various works have been presented in the junction of ML and ST areas. The lack of general guidelines for applying appropriate learning methods for software testing purposes is our major motivation in this current paper. In this paper, we introduce a classification framework which can help to systematically review research work in the ML and ST domains. The proposed framework dimensions are defined using major characteristics of existing software testing and machine learning methods. Our framework can be used to effectively construct a concrete set of guidelines for choosing the most appropriate learning method and applying it to a distinct stage of the software testing life-cycle for automation purposes.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
Software Testing (ST) processes attempt to verify and validate the capability of a software system to meet its required attributes and functionality. As software systems become more complex, the need for automated software testing methods emerges. Machine Learning (ML) techniques have shown to be quite useful for this automation process. Various works have been presented in the junction of ML and ST areas. The lack of general guidelines for applying appropriate learning methods for software testing purposes is our major motivation in this current paper. In this paper, we introduce a classification framework which can help to systematically review research work in the ML and ST domains. The proposed framework dimensions are defined using major characteristics of existing software testing and machine learning methods. Our framework can be used to effectively construct a concrete set of guidelines for choosing the most appropriate learning method and applying it to a distinct stage of the software testing life-cycle for automation purposes.
Close
2010
Bagheri, Ebrahim; Ghorbani, Ali A
A Model for the Integration of Prioritized Knowledge Bases Through Subjective Belief Games Journal Article
In: IEEE Transactions on Systems, Man, and Cybernetics, Part A, vol. 40, no. 6, pp. 1257-1270, 2010.
Abstract | BibTeX | Tags:
@article{DBLP:journals/tsmc/BagheriG10,
title = {A Model for the Integration of Prioritized Knowledge Bases
Through Subjective Belief Games},
author = {Ebrahim Bagheri and Ali A Ghorbani},
year = {2010},
date = {2010-01-01},
journal = {IEEE Transactions on Systems, Man, and Cybernetics, Part
A},
volume = {40},
number = {6},
pages = {1257-1270},
abstract = {Belief merging is concerned with the integration of several belief bases such that a coherent belief base is developed. Various belief merging models that use a belief negotiation game have been developed. These models often consist of two key functions, namely, negotiation and weakening. A negotiation function finds and selects the weakest belief bases among the available belief bases, while the weakening function removes the least valuable set of information from the selected belief base. This process is iteratively repeated until a consistent belief base is developed. In this paper, we extend the current game-based belief merging models by introducing the Subjective belief game model. The Subjective belief game model operates over a Subjective belief profile, which consists of belief bases with Subjectively annotated formulas. The Subjective information attached to each formula enables the proposed model to prioritize the formulas in the merging process. One of the advantages of the proposed game is that it provides room for enhancing the content of the weak belief bases, instead of enforcing their further weakening. Trustworthiness of the information sources is also considered. We provide several instantiations of the model. The Subjective belief game model can be useful for formalizing a negotiation process between the human participants of a design process in cases where discrepancies and conflicts arise. We apply this belief game model to an example case study of collaboratively designing some parts of unified modeling language (UML) class diagram for vehicle design.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}

Close
Belief merging is concerned with the integration of several belief bases such that a coherent belief base is developed. Various belief merging models that use a belief negotiation game have been developed. These models often consist of two key functions, namely, negotiation and weakening. A negotiation function finds and selects the weakest belief bases among the available belief bases, while the weakening function removes the least valuable set of information from the selected belief base. This process is iteratively repeated until a consistent belief base is developed. In this paper, we extend the current game-based belief merging models by introducing the Subjective belief game model. The Subjective belief game model operates over a Subjective belief profile, which consists of belief bases with Subjectively annotated formulas. The Subjective information attached to each formula enables the proposed model to prioritize the formulas in the merging process. One of the advantages of the proposed game is that it provides room for enhancing the content of the weak belief bases, instead of enforcing their further weakening. Trustworthiness of the information sources is also considered. We provide several instantiations of the model. The Subjective belief game model can be useful for formalizing a negotiation process between the human participants of a design process in cases where discrepancies and conflicts arise. We apply this belief game model to an example case study of collaboratively designing some parts of unified modeling language (UML) class diagram for vehicle design.
Close
Bagheri, Ebrahim; Ghorbani, Ali A
An exploratory classification of applications in the realm of collaborative modeling and design Journal Article
In: Inf. Syst. E-Business Management, vol. 8, no. 3, pp. 257-286, 2010.
Abstract | BibTeX | Tags:
@article{DBLP:journals/isem/BagheriG10,
title = {An exploratory classification of applications in the realm
of collaborative modeling and design},
author = {Ebrahim Bagheri and Ali A Ghorbani},
year = {2010},
date = {2010-01-01},
journal = {Inf. Syst. E-Business Management},
volume = {8},
number = {3},
pages = {257-286},
abstract = {The high complexity and diversity of today's design projects demands the participation of multiple experts. The participating experts can influence the design process by sharing their perspective, expertise and resources. The involvement of various experts is often known as collaborative modeling and design. A collaborative modeling environment can encompass various geographical or organizational boundaries. In this paper, we provide a classification to study various aspects of this important issue through the exploration of the existing models, methods and applications in this area. The paper further addresses both model-oriented and artifact-neutral collaboration approaches and enumerates their features. The paper introduces the features of and classifies several relevant applications. The classification can serve as a guideline for customizing a suitable setting for a collaborative modeling process based on given requirements, needs and demands. Several suggestions for future work are also provided.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}

Close
The high complexity and diversity of today's design projects demands the participation of multiple experts. The participating experts can influence the design process by sharing their perspective, expertise and resources. The involvement of various experts is often known as collaborative modeling and design. A collaborative modeling environment can encompass various geographical or organizational boundaries. In this paper, we provide a classification to study various aspects of this important issue through the exploration of the existing models, methods and applications in this area. The paper further addresses both model-oriented and artifact-neutral collaboration approaches and enumerates their features. The paper introduces the features of and classifies several relevant applications. The classification can serve as a guideline for customizing a suitable setting for a collaborative modeling process based on given requirements, needs and demands. Several suggestions for future work are also provided.
Close
Boskovic, Marko; Bagheri, Ebrahim; Gasevic, Dragan; Mohabbati, Bardia; Kaviani, Nima; Hatala, Marek
Automated Staged Configuration with Semantic Web Technologies Journal Article
In: International Journal of Software Engineering and Knowledge Engineering, vol. 20, no. 4, pp. 459-484, 2010.
Abstract | BibTeX | Tags:
@article{DBLP:journals/ijseke/BoskovicBGMKH10,
title = {Automated Staged Configuration with Semantic Web Technologies},
author = {Marko Boskovic and Ebrahim Bagheri and Dragan Gasevic and Bardia Mohabbati and Nima Kaviani and Marek Hatala},
year = {2010},
date = {2010-01-01},
journal = {International Journal of Software Engineering and Knowledge
Engineering},
volume = {20},
number = {4},
pages = {459-484},
abstract = {Since the introduction in the early nineties, feature models receive a great deal of attention in industry and academia. Industrial success stories in applying feature models for modeling software product lines, and using them for configuring software-intensive systems motivate academia to discover ways to integrate different feature dependencies into the feature model, and automate verified feature configuration. In this paper we demonstrate how ontologies and Semantic Web technologies facilitate seamless integration of required external services and deployment platform capabilities into the feature model. Furthermore, we also contribute with an algorithm for automating staged configuration using Semantic Web reasoners to discover unfeasible features of the feature model.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}

Close
Since the introduction in the early nineties, feature models receive a great deal of attention in industry and academia. Industrial success stories in applying feature models for modeling software product lines, and using them for configuring software-intensive systems motivate academia to discover ways to integrate different feature dependencies into the feature model, and automate verified feature configuration. In this paper we demonstrate how ontologies and Semantic Web technologies facilitate seamless integration of required external services and deployment platform capabilities into the feature model. Furthermore, we also contribute with an algorithm for automating staged configuration using Semantic Web reasoners to discover unfeasible features of the feature model.
Close
Bagheri, Ebrahim; Ghorbani, Ali A
The analysis and management of non-canonical requirement specifications through a belief integration game Journal Article
In: Knowl. Inf. Syst., vol. 22, no. 1, pp. 27-64, 2010.
Abstract | BibTeX | Tags:
@article{DBLP:journals/kais/BagheriG10,
title = {The analysis and management of non-canonical requirement
specifications through a belief integration game},
author = {Ebrahim Bagheri and Ali A Ghorbani},
year = {2010},
date = {2010-01-01},
journal = {Knowl. Inf. Syst.},
volume = {22},
number = {1},
pages = {27-64},
abstract = {Non-canonical requirement specifications refer to a set of software requirements that is either inconsistent, vague or incomplete. In this paper, we provide a correspondence between requirement specifications and annotated propositional belief bases. Through this analogy, we are able to analyze the contents of a given set of requirement collections known as viewpoints and specify whether they are incomplete, incoherent, or inconsistent under a closed-world reasoning assumption. Based on the requirement collections' properties introduced in this paper, we define a viewpoint integration game through which the inconsistencies of non-canonical requirement specifications are resolved. The game consists of several rounds of negotiation and is performed by two main functions, namely choice and enhancement functions. The outcome of this game is a set of inconsistency-free requirement collections that can be integrated to form a unique fair representative of the given requirement collections.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}

Close
Non-canonical requirement specifications refer to a set of software requirements that is either inconsistent, vague or incomplete. In this paper, we provide a correspondence between requirement specifications and annotated propositional belief bases. Through this analogy, we are able to analyze the contents of a given set of requirement collections known as viewpoints and specify whether they are incomplete, incoherent, or inconsistent under a closed-world reasoning assumption. Based on the requirement collections' properties introduced in this paper, we define a viewpoint integration game through which the inconsistencies of non-canonical requirement specifications are resolved. The game consists of several rounds of negotiation and is performed by two main functions, namely choice and enhancement functions. The outcome of this game is a set of inconsistency-free requirement collections that can be integrated to form a unique fair representative of the given requirement collections.
Close
Bagheri, Ebrahim; Ghorbani, Ali A
UML-CI: A reference model for profiling critical infrastructure systems Journal Article
In: Information Systems Frontiers, vol. 12, no. 2, pp. 115-139, 2010.
Abstract | BibTeX | Tags:
@article{DBLP:journals/isf/BagheriG10,
title = {UML-CI: A reference model for profiling critical infrastructure
systems},
author = {Ebrahim Bagheri and Ali A Ghorbani},
year = {2010},
date = {2010-01-01},
journal = {Information Systems Frontiers},
volume = {12},
number = {2},
pages = {115-139},
abstract = {The study of critical infrastructure systems organization and behavior has drawn great attention in the recent years. This is in part due to their great influence on the ordinary life of every citizen. In this paper, we study critical infrastructures' characteristics and propose a reference model based on the Unified Modeling Language (UML). This reference model attempts to provide suitable means for the task of modeling an infrastructure system through offering five major metamodels. We introduce each of these metamodels and explain how it is possible to integrate them into a unique representation to characterize various aspects of an infrastructure system. Based on the metamodels of UML-CI, infrastructure system knowledge bases can be built to aid the process of infrastructure system modeling, profiling, and management.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}

Close
The study of critical infrastructure systems organization and behavior has drawn great attention in the recent years. This is in part due to their great influence on the ordinary life of every citizen. In this paper, we study critical infrastructures' characteristics and propose a reference model based on the Unified Modeling Language (UML). This reference model attempts to provide suitable means for the task of modeling an infrastructure system through offering five major metamodels. We introduce each of these metamodels and explain how it is possible to integrate them into a unique representation to characterize various aspects of an infrastructure system. Based on the metamodels of UML-CI, infrastructure system knowledge bases can be built to aid the process of infrastructure system modeling, profiling, and management.
Close
Bagheri, Ebrahim; Ghorbani, Ali A
A Framework for the Manifestation of Tacit Critical Infrastructure Knowledge Book Section
In: Gopalakrishnan, Kasthurirangan; Peeta, Srinivas (Ed.): Sustainable and Resilient Critical Infrastructure Systems, pp. 139-158, 2010.
Abstract | BibTeX | Tags:
@incollection{DBLP:conf/GKPS/BagheriG10,
title = {A Framework for the Manifestation of Tacit Critical Infrastructure Knowledge},
author = {Ebrahim Bagheri and Ali A Ghorbani},
editor = {Kasthurirangan Gopalakrishnan and Srinivas Peeta},
year = {2010},
date = {2010-01-01},
booktitle = {Sustainable and Resilient Critical Infrastructure Systems},
pages = {139-158},
abstract = {Critical infrastructure systems are tightly-coupled socio-technical systems with complicated behavior. They have emerged as an important focal point of research due to both their vital role in the normal conduct of societal activities as well as their inherent appealing complications for researchers. In this chapter, we will report on our experience in developing techniques, tools and algorithms for revealing and interpreting the hidden intricacies of such systems. The chapter will include the description of several of our technologies that allow for the guided understanding of the current status quo of infrastructure systems through the Astrolabe methodology, the formal profiling of infrastructure systems using the UML-CI meta-modeling mechanism, and also observing the emergent behavior of these complex systems through the application of the agent-based AIMS simulation suite.},
keywords = {},
pubstate = {published},
tppubtype = {incollection}
}

Close
Critical infrastructure systems are tightly-coupled socio-technical systems with complicated behavior. They have emerged as an important focal point of research due to both their vital role in the normal conduct of societal activities as well as their inherent appealing complications for researchers. In this chapter, we will report on our experience in developing techniques, tools and algorithms for revealing and interpreting the hidden intricacies of such systems. The chapter will include the description of several of our technologies that allow for the guided understanding of the current status quo of infrastructure systems through the Astrolabe methodology, the formal profiling of infrastructure systems using the UML-CI meta-modeling mechanism, and also observing the emergent behavior of these complex systems through the application of the agent-based AIMS simulation suite.
Close
Ensan, Faezeh; Bagheri, Ebrahim
A framework for handling revisions in distributed ontologies Proceedings Article
In: SAC, pp. 1417-1422, 2010.
Abstract | BibTeX | Tags:
@inproceedings{DBLP:conf/sac/EnsanB10,
title = {A framework for handling revisions in distributed ontologies},
author = {Faezeh Ensan and Ebrahim Bagheri},
year = {2010},
date = {2010-01-01},
booktitle = {SAC},
pages = {1417-1422},
crossref = {DBLP:conf/sac/2010},
abstract = {One of the important issues in ontology management is handling incoming updates and dealing with possible inconsistencies that they may induce. This is even more challenging in the context of a modular and distributed representation, because of the side-effects of the propagation of changes to the other connected or related ontologies. In this paper, we analyze the notion of ontology revision in a distributed ontology representation. We introduce a revision operator for distributed ontologies and show that it satisfies important postulates for knowledge base revision. In addition, based on a tableau algorithm for ALC ontologies, we propose an algorithm for applying the received changes and revising the original ontology through the proposed operator.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
One of the important issues in ontology management is handling incoming updates and dealing with possible inconsistencies that they may induce. This is even more challenging in the context of a modular and distributed representation, because of the side-effects of the propagation of changes to the other connected or related ontologies. In this paper, we analyze the notion of ontology revision in a distributed ontology representation. We introduce a revision operator for distributed ontologies and show that it satisfies important postulates for knowledge base revision. In addition, based on a tableau algorithm for ALC ontologies, we propose an algorithm for applying the received changes and revising the original ontology through the proposed operator.
Close
Ensan, Faezeh; Du, Weichang
A Metric Suite for Evaluating Cohesion and Coupling in Modular Ontologies Proceedings Article
In: WoMO, pp. 41-52, 2010.
BibTeX | Tags:
@inproceedings{DBLP:conf/womo/EnsanD10,
title = {A Metric Suite for Evaluating Cohesion and Coupling in Modular
Ontologies},
author = {Faezeh Ensan and Weichang Du},
year = {2010},
date = {2010-01-01},
booktitle = {WoMO},
pages = {41-52},
crossref = {DBLP:conf/womo/2010},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
Boskovic, Marko; Mussbacher, Gunter; Bagheri, Ebrahim; Amyot, Daniel; Gasevic, Dragan; Hatala, Marek
Aspect-Oriented Feature Models Proceedings Article
In: MoDELS Workshops, pp. 110-124, 2010.
Abstract | BibTeX | Tags:
@inproceedings{DBLP:conf/models/BoskovicMBAGH10,
title = {Aspect-Oriented Feature Models},
author = {Marko Boskovic and Gunter Mussbacher and Ebrahim Bagheri and Daniel Amyot and Dragan Gasevic and Marek Hatala},
year = {2010},
date = {2010-01-01},
booktitle = {MoDELS Workshops},
pages = {110-124},
crossref = {DBLP:conf/models/2010w},
abstract = {Software Product Lines (SPLs) have emerged as a prominent approach for software reuse. SPLs are sets of software systems called families that are usually developed as a whole and share many common features. Feature models are most typically used as a means for capturing commonality and managing variability of the family. A particular product from the family is congured by selecting the desired features of that product. Typically, feature models are considered monolithic entities that do not support modularization well. As industrial feature models tend to be large, their modularization has become an important research topic lately. However, existing modularization approaches do not support modularization of crosscutting concerns. In this position paper, we introduce Aspect-oriented Feature Models (AoFM) and argue that using aspect-oriented techniques improves the manageability and reduces the maintainability effort of feature models. Particularly, we advocate an asymmetric approach that allows for the modularization of basic and crosscutting concerns in feature models.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
Software Product Lines (SPLs) have emerged as a prominent approach for software reuse. SPLs are sets of software systems called families that are usually developed as a whole and share many common features. Feature models are most typically used as a means for capturing commonality and managing variability of the family. A particular product from the family is congured by selecting the desired features of that product. Typically, feature models are considered monolithic entities that do not support modularization well. As industrial feature models tend to be large, their modularization has become an important research topic lately. However, existing modularization approaches do not support modularization of crosscutting concerns. In this position paper, we introduce Aspect-oriented Feature Models (AoFM) and argue that using aspect-oriented techniques improves the manageability and reduces the maintainability effort of feature models. Particularly, we advocate an asymmetric approach that allows for the modularization of basic and crosscutting concerns in feature models.
Close
Tavallaee, Mahbod; Lu, Wei; Bagheri, Ebrahim; Ghorbani, Ali A
Automatic Discovery of Network Applications: A Hybrid Approach Proceedings Article
In: Canadian Conference on AI, pp. 208-219, 2010.
Abstract | BibTeX | Tags:
@inproceedings{DBLP:conf/ai/TavallaeeLBG10,
title = {Automatic Discovery of Network Applications: A Hybrid Approach},
author = {Mahbod Tavallaee and Wei Lu and Ebrahim Bagheri and Ali A Ghorbani},
year = {2010},
date = {2010-01-01},
booktitle = {Canadian Conference on AI},
pages = {208-219},
crossref = {DBLP:conf/ai/2010},
abstract = {Automatic discovery of network applications is a very challenging task which has received a lot of attentions due to its importance in many areas such as network security, QoS provisioning, and network management. In this paper, we propose an online hybrid mechanism for the classification of network flows, in which we employ a signature-based classifier in the first level, and then using the weighted unigram model we improve the performance of the system by labeling the unknown portion. Our evaluation on two real networks shows between 5% and 9% performance improvement applying the genetic algorithm based scheme to find the appropriate weights for the unigram model.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
Automatic discovery of network applications is a very challenging task which has received a lot of attentions due to its importance in many areas such as network security, QoS provisioning, and network management. In this paper, we propose an online hybrid mechanism for the classification of network flows, in which we employ a signature-based classifier in the first level, and then using the weighted unigram model we improve the performance of the system by labeling the unknown portion. Our evaluation on two real networks shows between 5% and 9% performance improvement applying the genetic algorithm based scheme to find the appropriate weights for the unigram model.
Close
Bagheri, Ebrahim; Noia, Tommaso Di; Ragone, Azzurra; Gasevic, Dragan
Configuring Software Product Line Feature Models Based on Stakeholders' Soft and Hard Requirements Proceedings Article
In: SPLC, pp. 16-31, 2010.
Abstract | BibTeX | Tags:
@inproceedings{DBLP:conf/splc/BagheriNRG10,
title = {Configuring Software Product Line Feature Models Based on
Stakeholders' Soft and Hard Requirements},
author = {Ebrahim Bagheri and Tommaso Di Noia and Azzurra Ragone and Dragan Gasevic},
year = {2010},
date = {2010-01-01},
booktitle = {SPLC},
pages = {16-31},
crossref = {DBLP:conf/splc/2010},
abstract = {Feature modeling is a technique for capturing commonality and variability. Feature models symbolize a representation of the possible application configuration space, and can be customized based on specific domain requirements and stakeholder goals. Most feature model configuration processes neglect the need to have a holistic approach towards the integration and satisfaction of the stakeholders' soft and hard constraints, and the application domain integrity constraints. In this paper, we will show how the structure and constraints of a feature model can be modeled uniformly through Propositional Logic extended with concrete domains, called P(N). Furthermore, we formalize the representation of soft constraints in fuzzy P(N) and explain how semi-automated feature model configuration is performed. The model configuration derivation process that we propose respects the soundness and completeness properties.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
Feature modeling is a technique for capturing commonality and variability. Feature models symbolize a representation of the possible application configuration space, and can be customized based on specific domain requirements and stakeholder goals. Most feature model configuration processes neglect the need to have a holistic approach towards the integration and satisfaction of the stakeholders' soft and hard constraints, and the application domain integrity constraints. In this paper, we will show how the structure and constraints of a feature model can be modeled uniformly through Propositional Logic extended with concrete domains, called P(N). Furthermore, we formalize the representation of soft constraints in fuzzy P(N) and explain how semi-automated feature model configuration is performed. The model configuration derivation process that we propose respects the soundness and completeness properties.
Close
Bagheri, Ebrahim; Ensan, Faezeh
Evidential reasoning for the treatment of incoherent terminologies Proceedings Article
In: SAC, pp. 1381-1387, 2010.
Abstract | BibTeX | Tags:
@inproceedings{DBLP:conf/sac/BagheriE10,
title = {Evidential reasoning for the treatment of incoherent terminologies},
author = {Ebrahim Bagheri and Faezeh Ensan},
year = {2010},
date = {2010-01-01},
booktitle = {SAC},
pages = {1381-1387},
crossref = {DBLP:conf/sac/2010},
abstract = {Many reasoning algorithms and techniques require consistent terminologies to be able to operate correctly and efficiently. However, many ontologies become inconsistent during their evolution and lifecycle. Many methods have been proposed to handle inconsistent terminologies including those that tolerate or repair inconsistencies. Most of these approaches focus on the syntactic properties of ontology terminologies and attempt to address inconsistency from that perspective and satisfy postulates such as the principle of minimal change. In this paper, we will employ evidential reasoning to take into account assertional statements of an ontology as observations and probable indications for the correctness and validity of one axiom over other competing axioms. We will show how ontology assertions are beneficial in ranking axioms to be used in Reiter's hitting set algorithm.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
Many reasoning algorithms and techniques require consistent terminologies to be able to operate correctly and efficiently. However, many ontologies become inconsistent during their evolution and lifecycle. Many methods have been proposed to handle inconsistent terminologies including those that tolerate or repair inconsistencies. Most of these approaches focus on the syntactic properties of ontology terminologies and attempt to address inconsistency from that perspective and satisfy postulates such as the principle of minimal change. In this paper, we will employ evidential reasoning to take into account assertional statements of an ontology as observations and probable indications for the correctness and validity of one axiom over other competing axioms. We will show how ontology assertions are beneficial in ranking axioms to be used in Reiter's hitting set algorithm.
Close
Bagheri, Ebrahim; Asadi, Mohsen; Gasevic, Dragan; Soltani, Samaneh
Stratified Analytic Hierarchy Process: Prioritization and Selection of Software Features Proceedings Article
In: SPLC, pp. 300-315, 2010.
Abstract | BibTeX | Tags:
@inproceedings{DBLP:conf/splc/BagheriAGS10,
title = {Stratified Analytic Hierarchy Process: Prioritization and
Selection of Software Features},
author = {Ebrahim Bagheri and Mohsen Asadi and Dragan Gasevic and Samaneh Soltani},
year = {2010},
date = {2010-01-01},
booktitle = {SPLC},
pages = {300-315},
crossref = {DBLP:conf/splc/2010},
abstract = {Product line engineering allows for the rapid development of variants of a domain specific application by using a common set of reusable assets often known as core assets. Variability modeling is a critical issue in product line engineering, where the use of feature modeling is one of most commonly used formalisms. To support an effective and automated derivation of concrete products for a product family, staged configuration has been proposed in the re-search literature. In this paper, we propose the integration of well-known requirements engineering principles into stage configuration. Being inspired by the well-established Preview requirements engineering framework, we initially propose an extension of feature models with capabilities for capturing business oriented requirements. This representation enables a more effective capturing of stakeholders' preferences over the business requirements and objectives (e.g.,. implementation costs or security) in the form of fuzzy linguistic variables (e.g., high, medium, and low). On top of this extension, we propose a novel method, the Stratified Analytic Hierarchy process, which first helps to rank and select the most relevant high level business objectives for the target stakeholders (e.g., security over implementation costs), and then helps to rank and select the most relevant features from the feature model to be used as the starting point in the staged configuration process. Besides a complete formalization of the process, we define the place of our proposal in existing software product line lifecycles as well as demonstrate the use of our proposal on the widely-used e-Shop case study. Finally, we report on the results of our user study, which indicates a high appreciation of the proposed method by the participating industrial software developers. The tool support for S-AHP is also introduced.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
Product line engineering allows for the rapid development of variants of a domain specific application by using a common set of reusable assets often known as core assets. Variability modeling is a critical issue in product line engineering, where the use of feature modeling is one of most commonly used formalisms. To support an effective and automated derivation of concrete products for a product family, staged configuration has been proposed in the re-search literature. In this paper, we propose the integration of well-known requirements engineering principles into stage configuration. Being inspired by the well-established Preview requirements engineering framework, we initially propose an extension of feature models with capabilities for capturing business oriented requirements. This representation enables a more effective capturing of stakeholders' preferences over the business requirements and objectives (e.g.,. implementation costs or security) in the form of fuzzy linguistic variables (e.g., high, medium, and low). On top of this extension, we propose a novel method, the Stratified Analytic Hierarchy process, which first helps to rank and select the most relevant high level business objectives for the target stakeholders (e.g., security over implementation costs), and then helps to rank and select the most relevant features from the feature model to be used as the starting point in the staged configuration process. Besides a complete formalization of the process, we define the place of our proposal in existing software product line lifecycles as well as demonstrate the use of our proposal on the widely-used e-Shop case study. Finally, we report on the results of our user study, which indicates a high appreciation of the proposed method by the participating industrial software developers. The tool support for S-AHP is also introduced.
Close
2009
Bagheri, Ebrahim; Ghorbani, Ali A
A belief-theoretic framework for the collaborative development and integration of para-consistent conceptual models Journal Article
In: Journal of Systems and Software, vol. 82, no. 4, pp. 707-729, 2009.
Abstract | BibTeX | Tags:
@article{DBLP:journals/jss/BagheriG09,
title = {A belief-theoretic framework for the collaborative development
and integration of para-consistent conceptual models},
author = {Ebrahim Bagheri and Ali A Ghorbani},
year = {2009},
date = {2009-01-01},
journal = {Journal of Systems and Software},
volume = {82},
number = {4},
pages = {707-729},
abstract = {Merging and integrating different conceptual models which have been collaboratively developed by domain experts and analysts with dissimilar perspectives on the same issue has been the subject of tremendous amount of research. In this paper, we focus on the fact that human analysts' opinions possess a degree of uncertainty which can be exploited while integrating such information. We propose an underlying modeling construct which is the basis for transforming conceptual models into a manipulatable format. Based on this construct, methods for formally negotiating over and merging of conceptual models are proposed. The approach presented in this paper focuses on the formalization of uncertainty and expert reliability through the employment of belief theory. The proposed work has been evaluated for its effectiveness and usability. The evaluators (a group of Computer Science graduate students) believed that the proposed framework has the capability to fulfil its intended tasks. The obtained results from the performance perspective are also promising.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}

Close
Merging and integrating different conceptual models which have been collaboratively developed by domain experts and analysts with dissimilar perspectives on the same issue has been the subject of tremendous amount of research. In this paper, we focus on the fact that human analysts' opinions possess a degree of uncertainty which can be exploited while integrating such information. We propose an underlying modeling construct which is the basis for transforming conceptual models into a manipulatable format. Based on this construct, methods for formally negotiating over and merging of conceptual models are proposed. The approach presented in this paper focuses on the formalization of uncertainty and expert reliability through the employment of belief theory. The proposed work has been evaluated for its effectiveness and usability. The evaluators (a group of Computer Science graduate students) believed that the proposed framework has the capability to fulfil its intended tasks. The obtained results from the performance perspective are also promising.
Close
Bagheri, Ebrahim; Ghorbani, Ali A
Astrolabe: A Collaborative Multiperspective Goal-Oriented Risk Analysis Methodology Journal Article
In: IEEE Transactions on Systems, Man, and Cybernetics, Part A, vol. 39, no. 1, pp. 66-85, 2009.
Abstract | BibTeX | Tags:
@article{DBLP:journals/tsmc/BagheriG09,
title = {Astrolabe: A Collaborative Multiperspective Goal-Oriented
Risk Analysis Methodology},
author = {Ebrahim Bagheri and Ali A Ghorbani},
year = {2009},
date = {2009-01-01},
journal = {IEEE Transactions on Systems, Man, and Cybernetics, Part
A},
volume = {39},
number = {1},
pages = {66-85},
abstract = {The intention of this paper is to introduce a risk analysis methodology called Astrolabe. Astrolabe is based on causal analysis of systems risks. It allows the analysts to both align the current standpoint of the system with its intentions and identify any vulnerabilities or hazards that threaten the systems stability. Astrolabe adopts concepts from organizational theory and software requirement engineering. The aim of Astrolabe is to guide risk analysis through its phases so that a near complete investigation of system risks is performed. The concepts and methods driving the Astrolabe methodology have been clearly explained in this paper.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}

Close
The intention of this paper is to introduce a risk analysis methodology called Astrolabe. Astrolabe is based on causal analysis of systems risks. It allows the analysts to both align the current standpoint of the system with its intentions and identify any vulnerabilities or hazards that threaten the systems stability. Astrolabe adopts concepts from organizational theory and software requirement engineering. The aim of Astrolabe is to guide risk analysis through its phases so that a near complete investigation of system risks is performed. The concepts and methods driving the Astrolabe methodology have been clearly explained in this paper.
Close
Bagheri, Ebrahim; Zafarani, Reza; Barouni-Ebrahimi, M
Can reputation migrate? On the propagation of reputation in multi-context communities Journal Article
In: Knowl.-Based Syst., vol. 22, no. 6, pp. 410-420, 2009.
Abstract | BibTeX | Tags:
@article{DBLP:journals/kbs/BagheriZB09,
title = {Can reputation migrate? On the propagation of reputation
in multi-context communities},
author = {Ebrahim Bagheri and Reza Zafarani and M Barouni-Ebrahimi},
year = {2009},
date = {2009-01-01},
journal = {Knowl.-Based Syst.},
volume = {22},
number = {6},
pages = {410-420},
abstract = {As e-communities grow in both quality and quantity, their online users require more appropriate tools to suite their needs in such environments. Many such tools are not explicitly needed in real-world communities where humans directly interact with each other. Trust making and reputation ascription are among the most important examples of such tools. Humans often build trust relationships through interaction or recommendation, and are therefore able to ascribe relevant reputation to those they interact with. However, in online communities the process of trust making and reputation ascription is more complicated. In this paper, we address a special case of the trust making process where community users need to create bonds with those they have not encountered before. This is a common situation in websites such as amazon.com, ebay.com, epionions.com and many others. The model we propose is able to estimate the possible reputation of a given identity in a any new context by observing his/her behavior in other communities. Our proposed model employs Dempster�Shafer based valuation networks to develop a global reputation structure and performs a belief propagation technique to infer contextual reputation values. The preliminary evaluation of the proposed model on a dataset collected from epinions.com shows promising results.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}

Close
As e-communities grow in both quality and quantity, their online users require more appropriate tools to suite their needs in such environments. Many such tools are not explicitly needed in real-world communities where humans directly interact with each other. Trust making and reputation ascription are among the most important examples of such tools. Humans often build trust relationships through interaction or recommendation, and are therefore able to ascribe relevant reputation to those they interact with. However, in online communities the process of trust making and reputation ascription is more complicated. In this paper, we address a special case of the trust making process where community users need to create bonds with those they have not encountered before. This is a common situation in websites such as amazon.com, ebay.com, epionions.com and many others. The model we propose is able to estimate the possible reputation of a given identity in a any new context by observing his/her behavior in other communities. Our proposed model employs Dempster�Shafer based valuation networks to develop a global reputation structure and performs a belief propagation technique to infer contextual reputation values. The preliminary evaluation of the proposed model on a dataset collected from epinions.com shows promising results.
Close
Tavallaee, Mahbod; Bagheri, Ebrahim; Lu, Wei; Ghorbani, Ali A
A Detailed Analysis of the KDD CUP 99 Data Set Proceedings Article
In: IEEE Symposium on Computational Intelligence for Security and Defence Applications, pp. 1 - 6, 2009.
Abstract | BibTeX | Tags:
@inproceedings{DBLP:conf/CISDA/TavallaeeBLG09,
title = {A Detailed Analysis of the KDD CUP 99 Data Set},
author = {Mahbod Tavallaee and Ebrahim Bagheri and Wei Lu and Ali A Ghorbani},
year = {2009},
date = {2009-01-01},
booktitle = {IEEE Symposium on Computational Intelligence for Security and Defence Applications},
pages = {1 - 6},
crossref = {DBLP:conf/ai/2010},
abstract = {During the last decade, anomaly detection has attracted the attention of many researchers to overcome the weakness of signature-based IDSs in detecting novel attacks, and KDDCUP'99 is the mostly widely used data set for the evaluation of these systems. Having conducted an statistical analysis on this data set, we found two important issues which highly affects the performance of evaluated systems, and results in a very poor evaluation of anomaly detection approaches. To solve these issues, we have proposed a new data set, NSL-KDD, which consists of selected records of the complete KDD data set and does not suffer from any of mentioned shortcomings.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
During the last decade, anomaly detection has attracted the attention of many researchers to overcome the weakness of signature-based IDSs in detecting novel attacks, and KDDCUP'99 is the mostly widely used data set for the evaluation of these systems. Having conducted an statistical analysis on this data set, we found two important issues which highly affects the performance of evaluated systems, and results in a very poor evaluation of anomaly detection approaches. To solve these issues, we have proposed a new data set, NSL-KDD, which consists of selected records of the complete KDD data set and does not suffer from any of mentioned shortcomings.
Close
Jashki, Mohammad-Amin; Makki, Majid; Bagheri, Ebrahim; Ghorbani, Ali A
An Iterative Hybrid Filter-Wrapper Approach to Feature Selection for Document Clustering Proceedings Article
In: Canadian Conference on AI, pp. 74-85, 2009.
Abstract | BibTeX | Tags:
@inproceedings{DBLP:conf/ai/JashkiMBG09,
title = {An Iterative Hybrid Filter-Wrapper Approach to Feature Selection
for Document Clustering},
author = {Mohammad-Amin Jashki and Majid Makki and Ebrahim Bagheri and Ali A Ghorbani},
year = {2009},
date = {2009-01-01},
booktitle = {Canadian Conference on AI},
pages = {74-85},
abstract = {The manipulation of large-scale document data sets often involves the processing of a wealth of features that correspond with the available terms in the document space. The employment of all these features in the learning machine of interest is time consuming and at times reduces the performance of the learning machine. The feature space may consist of many redundant or non-discriminant features; therefore, feature selection techniques have been widely used. In this paper, we introduce a hybrid feature selection algorithm that selects features by applying both lter and wrapper methods in a hybrid manner, and iteratively selects the most competent set of features with an expectation maximization based algorithm. The proposed method employs a greedy algorithm for feature selection in each step. The method has been tested on various data sets whose results have been reported in this paper. The performance of the method both in terms of accuracy and Normalized Mutual Information is promising.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
The manipulation of large-scale document data sets often involves the processing of a wealth of features that correspond with the available terms in the document space. The employment of all these features in the learning machine of interest is time consuming and at times reduces the performance of the learning machine. The feature space may consist of many redundant or non-discriminant features; therefore, feature selection techniques have been widely used. In this paper, we introduce a hybrid feature selection algorithm that selects features by applying both lter and wrapper methods in a hybrid manner, and iteratively selects the most competent set of features with an expectation maximization based algorithm. The proposed method employs a greedy algorithm for feature selection in each step. The method has been tested on various data sets whose results have been reported in this paper. The performance of the method both in terms of accuracy and Normalized Mutual Information is promising.
Close
Ensan, Faezeh; Bagheri, Ebrahim; Du, Weichang
he Effect of Modification and Update Propagation on Modular Ontologies Proceedings Article
In: Second Canadian Semantic Web Symposium, 2009.
Abstract | BibTeX | Tags:
@inproceedings{DBLP:conf/CSWS/EnsanBD09,
title = {he Effect of Modification and Update Propagation on Modular Ontologies},
author = {Faezeh Ensan and Ebrahim Bagheri and Weichang Du},
year = {2009},
date = {2009-01-01},
booktitle = {Second Canadian Semantic Web Symposium},
abstract = {One of the most important issues in the development of ontologies is dealing with revisions and updates, which becomes even more prominent in modular representations of ontologies, in light of the fact that an update in one ontology module may lead to unintended consequences in other ontology modules due to their coupling. In this paper, we provide a basis for updating modular ontologies and specially those that are represented by the interface-based formalism for modular ontologies. We illustrate different scenarios where modifications and propagation of updates in connected ontologies induce inconsistencies. An algorithm for handling these inconsistencies is provided that updates modular ontologies with TBox modifications.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
One of the most important issues in the development of ontologies is dealing with revisions and updates, which becomes even more prominent in modular representations of ontologies, in light of the fact that an update in one ontology module may lead to unintended consequences in other ontology modules due to their coupling. In this paper, we provide a basis for updating modular ontologies and specially those that are represented by the interface-based formalism for modular ontologies. We illustrate different scenarios where modifications and propagation of updates in connected ontologies induce inconsistencies. An algorithm for handling these inconsistencies is provided that updates modular ontologies with TBox modifications.
Close
2008
Ghorbani, Ali A; Bagheri, Ebrahim
The state of the art in critical infrastructure protection: a framework for convergence Journal Article
In: IJCIS, vol. 4, no. 3, pp. 215-244, 2008.
Abstract | BibTeX | Tags:
@article{DBLP:journals/ijcritis/GhorbaniB08,
title = {The state of the art in critical infrastructure protection:
a framework for convergence},
author = {Ali A Ghorbani and Ebrahim Bagheri},
year = {2008},
date = {2008-01-01},
journal = {IJCIS},
volume = {4},
number = {3},
pages = {215-244},
abstract = {The protection of critical infrastructure systems has recently become a major concern for many countries. This is due to the effect of these systems on the daily lives of all citizens and the high possibility of disruption because of their complex structure and hidden interdependencies, which subsequently attract the attention of many researchers and scientists. The investigations of researchers have encompassed issues of national security, policymaking, infrastructure system organisation, and behaviour analysis and modelling. In this paper, we look into the latter subject and explore the attempts that have been made. Based on the available schemes and the requirements of this area, we propose a five-dimensional framework that introduces the major research necessities in this field. Among the various available schemes, we study ten of the most recently developed and/or influential systems. A comparison of these schemes based on the features of our proposed framework is made. The comparison allows us to conclude our examination with the identification of current research strengths and guidelines for future work.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}

Close
The protection of critical infrastructure systems has recently become a major concern for many countries. This is due to the effect of these systems on the daily lives of all citizens and the high possibility of disruption because of their complex structure and hidden interdependencies, which subsequently attract the attention of many researchers and scientists. The investigations of researchers have encompassed issues of national security, policymaking, infrastructure system organisation, and behaviour analysis and modelling. In this paper, we look into the latter subject and explore the attempts that have been made. Based on the available schemes and the requirements of this area, we propose a five-dimensional framework that introduces the major research necessities in this field. Among the various available schemes, we study ten of the most recently developed and/or influential systems. A comparison of these schemes based on the features of our proposed framework is made. The comparison allows us to conclude our examination with the identification of current research strengths and guidelines for future work.
Close
Bagheri, Ebrahim; Barouni-Ebrahimi, M; Zafarani, Reza; Ghorbani, Ali A
A Belief-Theoretic Reputation Estimation Model for Multi-context Communities Proceedings Article
In: Canadian Conference on AI, pp. 48-59, 2008.
BibTeX | Tags:
@inproceedings{DBLP:conf/ai/BagheriBZG08,
title = {A Belief-Theoretic Reputation Estimation Model for Multi-context
Communities},
author = {Ebrahim Bagheri and M Barouni-Ebrahimi and Reza Zafarani and Ali A Ghorbani},
year = {2008},
date = {2008-01-01},
booktitle = {Canadian Conference on AI},
pages = {48-59},
crossref = {DBLP:conf/ai/2008},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
Barouni-Ebrahimi, M; Bagheri, Ebrahim; Ghorbani, Ali A
A Frequency Mining-Based Algorithm for Re-ranking Web Search Engine Retrievals Proceedings Article
In: Canadian Conference on AI, pp. 60-65, 2008.
BibTeX | Tags:
@inproceedings{DBLP:conf/ai/Barouni-EbrahimiBG08,
title = {A Frequency Mining-Based Algorithm for Re-ranking Web Search
Engine Retrievals},
author = {M Barouni-Ebrahimi and Ebrahim Bagheri and Ali A Ghorbani},
year = {2008},
date = {2008-01-01},
booktitle = {Canadian Conference on AI},
pages = {60-65},
crossref = {DBLP:conf/ai/2008},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
Hubert, Laurent
A non-null annotation inferencer for Java bytecode Proceedings Article
In: PASTE, pp. 36-42, 2008.
BibTeX | Tags:
@inproceedings{DBLP:conf/paste/Hubert08,
title = {A non-null annotation inferencer for Java bytecode},
author = {Laurent Hubert},
year = {2008},
date = {2008-01-01},
booktitle = {PASTE},
pages = {36-42},
crossref = {DBLP:conf/paste/2008},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
Ensan, Faezeh
An Architecture and Formalism for Handling Modular Ontologies Proceedings Article
In: AAAI, pp. 1847-1848, 2008.
BibTeX | Tags:
@inproceedings{DBLP:conf/aaai/Ensan08,
title = {An Architecture and Formalism for Handling Modular Ontologies},
author = {Faezeh Ensan},
year = {2008},
date = {2008-01-01},
booktitle = {AAAI},
pages = {1847-1848},
crossref = {DBLP:conf/aaai/2008},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
Ensan, Faezeh; Du, Weichang
An Interface-Based Ontology Modularization Framework for Knowledge Encapsulation Proceedings Article
In: International Semantic Web Conference, pp. 517-532, 2008.
BibTeX | Tags:
@inproceedings{DBLP:conf/semweb/EnsanD08,
title = {An Interface-Based Ontology Modularization Framework for
Knowledge Encapsulation},
author = {Faezeh Ensan and Weichang Du},
year = {2008},
date = {2008-01-01},
booktitle = {International Semantic Web Conference},
pages = {517-532},
crossref = {DBLP:conf/semweb/2008},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
Makki, Majid; Bagheri, Ebrahim; Ghorbani, Ali A
Automating Architecture Trade-Off Decision Making through a Complex Multi-attribute Decision Process Proceedings Article
In: ECSA, pp. 264-272, 2008.
BibTeX | Tags:
@inproceedings{DBLP:conf/ecsa/MakkiBG08,
title = {Automating Architecture Trade-Off Decision Making through
a Complex Multi-attribute Decision Process},
author = {Majid Makki and Ebrahim Bagheri and Ali A Ghorbani},
year = {2008},
date = {2008-01-01},
booktitle = {ECSA},
pages = {264-272},
crossref = {DBLP:conf/ecsa/2008},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
Bagheri, Ebrahim; Ghorbani, Ali A
Experiences on the Belief-Theoretic Integration of Para-consistent Conceptual Models Proceedings Article
In: Australian Software Engineering Conference, pp. 357-366, 2008.
BibTeX | Tags:
@inproceedings{DBLP:conf/aswec/BagheriG08,
title = {Experiences on the Belief-Theoretic Integration of Para-consistent
Conceptual Models},
author = {Ebrahim Bagheri and Ali A Ghorbani},
year = {2008},
date = {2008-01-01},
booktitle = {Australian Software Engineering Conference},
pages = {357-366},
crossref = {DBLP:conf/aswec/2008},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
Ensan, Faezeh
Formalizing Ontology Modularization through the Notion of Interfaces Proceedings Article
In: EKAW, pp. 74-82, 2008.
BibTeX | Tags:
@inproceedings{DBLP:conf/ekaw/Ensan08,
title = {Formalizing Ontology Modularization through the Notion of
Interfaces},
author = {Faezeh Ensan},
year = {2008},
date = {2008-01-01},
booktitle = {EKAW},
pages = {74-82},
crossref = {DBLP:conf/ekaw/2008},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
Ensan, Faezeh; Du, Weichang
Formalizing the Role of Goals in the Development of Domain-Specific Ontological Frameworks Proceedings Article
In: HICSS, pp. 120, 2008.
BibTeX | Tags:
@inproceedings{DBLP:conf/hicss/EnsanD08,
title = {Formalizing the Role of Goals in the Development of Domain-Specific
Ontological Frameworks},
author = {Faezeh Ensan and Weichang Du},
year = {2008},
date = {2008-01-01},
booktitle = {HICSS},
pages = {120},
crossref = {DBLP:conf/hicss/2008},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
Bagheri, Ebrahim; Ghorbani, Ali A
On the Definition of Essential and Contingent Properties of Subjective Belief Bases Proceedings Article
In: MICAI, pp. 686-698, 2008.
BibTeX | Tags:
@inproceedings{DBLP:conf/micai/BagheriG08,
title = {On the Definition of Essential and Contingent Properties
of Subjective Belief Bases},
author = {Ebrahim Bagheri and Ali A Ghorbani},
year = {2008},
date = {2008-01-01},
booktitle = {MICAI},
pages = {686-698},
crossref = {DBLP:conf/micai/2008},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
Bagheri, Ebrahim; Ghorbani, Ali A
Towards a Belief-Theoretic Model for Collaborative Conceptual Model Development Proceedings Article
In: HICSS, pp. 489, 2008.
BibTeX | Tags:
@inproceedings{DBLP:conf/hicss/BagheriG08,
title = {Towards a Belief-Theoretic Model for Collaborative Conceptual
Model Development},
author = {Ebrahim Bagheri and Ali A Ghorbani},
year = {2008},
date = {2008-01-01},
booktitle = {HICSS},
pages = {489},
crossref = {DBLP:conf/hicss/2008},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
2007
Bagheri, Ebrahim; Naghibzadeh, Mahmoud
A New Approach to Resource Discovery and Dissemination for Pervasive Computing Environments Based on Mobile Agents Journal Article
In: Scientia Iranica Journal, vol. 14, no. 6, pp. 612 - 624, 2007.
Abstract | BibTeX | Tags:
@article{DBLP:journals/ScientiaIranica/BagheriN07,
title = {A New Approach to Resource Discovery and Dissemination for Pervasive Computing Environments Based on Mobile Agents},
author = {Ebrahim Bagheri and Mahmoud Naghibzadeh},
year = {2007},
date = {2007-01-01},
journal = {Scientia Iranica Journal},
volume = {14},
number = {6},
pages = {612 - 624},
abstract = {Pervasive computing as a new branch in the field of distributed computing has received wide contribution from different researchers. In this novel computing model, vast range of computational and communication resources along with other types of services are gathered under a single system image based on certain predefined criteria. Creating a transparent environment and providing the end-users with the illusion of local availability of multiple resources needs some kind of manager to coordinate the tasks and their required resources. The resource management system is mainly responsible for a balanced distribution of available resources among different tasks. Devising efficient resource discovery and dissemination algorithms is hence an important step towards preparing the bases for a resource centric management package. In this article we aim to provide two algorithms for this problem using mobile agents. The proposed resource discovery algorithms use two different hierarchical and flat approaches. The simulations show a good performance for both of the proposed models; however the hierarchical algorithm shows better results based on some of the introduced factors.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}

Close
Pervasive computing as a new branch in the field of distributed computing has received wide contribution from different researchers. In this novel computing model, vast range of computational and communication resources along with other types of services are gathered under a single system image based on certain predefined criteria. Creating a transparent environment and providing the end-users with the illusion of local availability of multiple resources needs some kind of manager to coordinate the tasks and their required resources. The resource management system is mainly responsible for a balanced distribution of available resources among different tasks. Devising efficient resource discovery and dissemination algorithms is hence an important step towards preparing the bases for a resource centric management package. In this article we aim to provide two algorithms for this problem using mobile agents. The proposed resource discovery algorithms use two different hierarchical and flat approaches. The simulations show a good performance for both of the proposed models; however the hierarchical algorithm shows better results based on some of the introduced factors.
Close
Bagheri, Ebrahim; Ghorbani, Ali A
A framework for distributed collaborative conceptual model development Proceedings Article
In: OOPSLA Companion, pp. 785-786, 2007.
BibTeX | Tags:
@inproceedings{DBLP:conf/oopsla/BagheriG07,
title = {A framework for distributed collaborative conceptual model
development},
author = {Ebrahim Bagheri and Ali A Ghorbani},
year = {2007},
date = {2007-01-01},
booktitle = {OOPSLA Companion},
pages = {785-786},
crossref = {DBLP:conf/oopsla/2007c},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
Bagheri, Ebrahim
A proposal for taming uncertainty in the collaborative conceptual model development process Proceedings Article
In: OOPSLA Companion, pp. 941-942, 2007.
BibTeX | Tags:
@inproceedings{DBLP:conf/oopsla/Bagheri07,
title = {A proposal for taming uncertainty in the collaborative conceptual
model development process},
author = {Ebrahim Bagheri},
year = {2007},
date = {2007-01-01},
booktitle = {OOPSLA Companion},
pages = {941-942},
crossref = {DBLP:conf/oopsla/2007c},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
Bagheri, Ebrahim; Ghorbani, Ali A
On the Collaborative Development of Para-Consistent Conceptual Models Proceedings Article
In: QSIC, pp. 336-341, 2007.
BibTeX | Tags:
@inproceedings{DBLP:conf/qsic/BagheriG07,
title = {On the Collaborative Development of Para-Consistent Conceptual
Models},
author = {Ebrahim Bagheri and Ali A Ghorbani},
year = {2007},
date = {2007-01-01},
booktitle = {QSIC},
pages = {336-341},
crossref = {DBLP:conf/qsic/2007},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
Bagheri, Ebrahim; Ghorbani, Ali A
Risk Analysis in Critical Infrastructure Systems based on the Astrolabe Methodology Proceedings Article
In: CNSR, pp. 335-344, 2007.
BibTeX | Tags:
@inproceedings{DBLP:conf/cnsr/BagheriG07,
title = {Risk Analysis in Critical Infrastructure Systems based on
the Astrolabe Methodology},
author = {Ebrahim Bagheri and Ali A Ghorbani},
year = {2007},
date = {2007-01-01},
booktitle = {CNSR},
pages = {335-344},
crossref = {DBLP:conf/cnsr/2007},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
Ensan, Faezeh; Bagheri, Ebrahim; Kahani, Mohsen
The Application of Users' Collective Experience for Crafting Suitable Search Engine Query Recommendations Proceedings Article
In: CNSR, pp. 148-156, 2007.
BibTeX | Tags:
@inproceedings{DBLP:conf/cnsr/EnsanBK07,
title = {The Application of Users' Collective Experience for Crafting
Suitable Search Engine Query Recommendations},
author = {Faezeh Ensan and Ebrahim Bagheri and Mohsen Kahani},
year = {2007},
date = {2007-01-01},
booktitle = {CNSR},
pages = {148-156},
crossref = {DBLP:conf/cnsr/2007},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
Ensan, Faezeh; Du, Weichang
Towards Domain-Centric Ontology Development and Maintenance Frameworks Proceedings Article
In: SEKE, pp. 622-627, 2007.
BibTeX | Tags:
@inproceedings{DBLP:conf/seke/EnsanD07,
title = {Towards Domain-Centric Ontology Development and Maintenance
Frameworks},
author = {Faezeh Ensan and Weichang Du},
year = {2007},
date = {2007-01-01},
booktitle = {SEKE},
pages = {622-627},
crossref = {DBLP:conf/seke/2007},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
2006
Bagheri, Ebrahim; Ghorbani, Ali A
Behavior analysis through reputation propagation in a multi-context environment Proceedings Article
In: Proceedings of the 2006 International Conference on Privacy, Security and Trust: Bridge the Gap Between PST Technologies and Business Services, PST 2006, Markham, Ontario, Canada, October 30 - November 1, 2006, pp. 40, 2006.
Links | BibTeX | Tags:
@inproceedings{DBLP:conf/pst/BagheriG06,
title = {Behavior analysis through reputation propagation in a multi-context
environment},
author = {Ebrahim Bagheri and Ali A Ghorbani},
url = {http://doi.acm.org/10.1145/1501434.1501482},
doi = {10.1145/1501434.1501482},
year = {2006},
date = {2006-01-01},
booktitle = {Proceedings of the 2006 International Conference on Privacy, Security
and Trust: Bridge the Gap Between PST Technologies and Business
Services, PST 2006, Markham, Ontario, Canada, October 30 - November
1, 2006},
pages = {40},
crossref = {DBLP:conf/pst/2006},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
http://doi.acm.org/10.1145/1501434.1501482
doi:10.1145/1501434.1501482
Close
Bagheri, Ebrahim; Deldari, Hossein
Dejong Function Optimization by Means of a Parallel Approach to Fuzzified Genetic Algorithm Proceedings Article
In: Proceedings of the 11th IEEE Symposium on Computers and Communications (ISCC 2006), 26-29 June 2006, Cagliari, Sardinia, Italy, pp. 675–680, 2006.
Links | BibTeX | Tags:
@inproceedings{DBLP:conf/iscc/BagheriD06,
title = {Dejong Function Optimization by Means of a Parallel Approach to Fuzzified
Genetic Algorithm},
author = {Ebrahim Bagheri and Hossein Deldari},
url = {http://dx.doi.org/10.1109/ISCC.2006.57},
doi = {10.1109/ISCC.2006.57},
year = {2006},
date = {2006-01-01},
booktitle = {Proceedings of the 11th IEEE Symposium on Computers and Communications
(ISCC 2006), 26-29 June 2006, Cagliari, Sardinia, Italy},
pages = {675--680},
crossref = {DBLP:conf/iscc/2006},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
http://dx.doi.org/10.1109/ISCC.2006.57
doi:10.1109/ISCC.2006.57
Close
Ensan, Faezeh; Yaghmaee, Mohammad Hossien; Bagheri, Ebrahim
FACT: A New Fuzzy Adaptive Clustering Technique Proceedings Article
In: ISCC, pp. 442-447, 2006.
BibTeX | Tags:
@inproceedings{DBLP:conf/iscc/EnsanYB06,
title = {FACT: A New Fuzzy Adaptive Clustering Technique},
author = {Faezeh Ensan and Mohammad Hossien Yaghmaee and Ebrahim Bagheri},
year = {2006},
date = {2006-01-01},
booktitle = {ISCC},
pages = {442-447},
crossref = {DBLP:conf/iscc/2006},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
Bagheri, Ebrahim; Ghorbani, Ali A
Towards an MDA-oriented UML profile for critical infrastructure modeling Proceedings Article
In: Proceedings of the 2006 International Conference on Privacy, Security and Trust: Bridge the Gap Between PST Technologies and Business Services, PST 2006, Markham, Ontario, Canada, October 30 - November 1, 2006, pp. 66, 2006.
Links | BibTeX | Tags:
@inproceedings{DBLP:conf/pst/BagheriG06a,
title = {Towards an MDA-oriented UML profile for critical infrastructure
modeling},
author = {Ebrahim Bagheri and Ali A Ghorbani},
url = {http://doi.acm.org/10.1145/1501434.1501512},
doi = {10.1145/1501434.1501512},
year = {2006},
date = {2006-01-01},
booktitle = {Proceedings of the 2006 International Conference on Privacy, Security
and Trust: Bridge the Gap Between PST Technologies and Business
Services, PST 2006, Markham, Ontario, Canada, October 30 - November
1, 2006},
pages = {66},
crossref = {DBLP:conf/pst/2006},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
http://doi.acm.org/10.1145/1501434.1501512
doi:10.1145/1501434.1501512
Close
2005
Bagheri, Ebrahim; Naghibzadeh, Mahmood; Kahani, Mohsen
A Novel Resource Dissemination and Discovery Model for Pervasive Environments Using Mobile Agents Proceedings Article
In: High Performance Computing and Communications, First International Conference, HPCC 2005, Sorrento, Italy, September 21-23, 2005, Proceedings, pp. 1043–1048, 2005.
Links | BibTeX | Tags:
@inproceedings{DBLP:conf/hpcc/BagheriNK05,
title = {A Novel Resource Dissemination and Discovery Model for Pervasive Environments
Using Mobile Agents},
author = {Ebrahim Bagheri and Mahmood Naghibzadeh and Mohsen Kahani},
url = {http://dx.doi.org/10.1007/11557654_115},
doi = {10.1007/11557654_115},
year = {2005},
date = {2005-01-01},
booktitle = {High Performance Computing and Communications, First International
Conference, HPCC 2005, Sorrento, Italy, September 21-23, 2005, Proceedings},
pages = {1043--1048},
crossref = {DBLP:conf/hpcc/2005},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Close
http://dx.doi.org/10.1007/11557654_115
doi:10.1007/11557654_115
Close
0000
Alizadeh, Havva; Zarrinkalam, Fattane; Fani, Hossein; Bagheri, Ebrahim
[No title] Journal Article Forthcoming
In: Information Processing and Management, Forthcoming.
Abstract | Links | BibTeX | Tags:
@article{IPM2024a,
title = {[No title]},
author = {Havva Alizadeh and Fattane Zarrinkalam and Hossein Fani and Ebrahim Bagheri},
url = {undefined},
journal = {Information Processing and Management},
abstract = {Predicting users’ interests on social networks is gaining attention due to its potential to
cater customized information and services to the end users. Although previous works have
extensively explored how users’ interests can be modeled on social networks, there has been
limited investigation into the prediction of users’ future interests. The objective of our work
in this paper is to empirically study the effectiveness of different sets of features based on
users’ past social interactions, historical interests and their temporal dynamics to predict their
interests over a collection of future-yet-unobserved topics. More specifically, we introduce and
formalize the features for interest prediction in four categories: user-based, topical, explicit
user-topic engagement, and friends’ influence. We further explore the influence of temporality
by augmenting features with information pertaining to users’ historical interests and social
connections. We model the task of future interest prediction as a learning-to-rank problem
where different features and their related categories are ranked based on their relevance and
performance in interest prediction, and investigate the efficiency of different features individually
and comparatively for predicting the future interest of users with different activity levels in
social networks over on unobserved topics. After conducting experiments on a real-world dataset
sourced from Twitter, we have identified several noteworthy findings: 1) relevance feature
in the category of past explicit user-topic engagement is the strongest indicator for predicting
user’s future interest across all user groups, with an observed 8.57% decrease in NDCG and an
8.95% decrease in MAP when it is removed in the ablation study. 2) the observation of an 8.06%
decrease in NDCG and a 7.3% decrease in MAP, when topical features such as popularity,
freshness, and coherence are removed in the ablation study, highlights their significance as
among the strongest indicators for users’ future interest, particularly for low-activity users. 3)
although temporal features show a clear positive impact across user groups with varying levels
of activity (resulting in a 4.5% decrease in NDCG and a 7.3% decrease in MAP when removed
in the ablation study), the temporal topical features do not demonstrate a significant positive
effect, and 4) The removal of user-specific characteristics such as influence and personality
traits in the ablation study reveals their significant impact in predicting future interest over
cold topics, reflected by a 5.49% decrease in NDCG and a 5.72% decrease in MAP. Our findings
make significant contributions to the field of future interest prediction, offering valuable insights
and practical implications for various applications in social network analysis.},
keywords = {},
pubstate = {forthcoming},
tppubtype = {article}
}

Close
Predicting users’ interests on social networks is gaining attention due to its potential to
cater customized information and services to the end users. Although previous works have
extensively explored how users’ interests can be modeled on social networks, there has been
limited investigation into the prediction of users’ future interests. The objective of our work
in this paper is to empirically study the effectiveness of different sets of features based on
users’ past social interactions, historical interests and their temporal dynamics to predict their
interests over a collection of future-yet-unobserved topics. More specifically, we introduce and
formalize the features for interest prediction in four categories: user-based, topical, explicit
user-topic engagement, and friends’ influence. We further explore the influence of temporality
by augmenting features with information pertaining to users’ historical interests and social
connections. We model the task of future interest prediction as a learning-to-rank problem
where different features and their related categories are ranked based on their relevance and
performance in interest prediction, and investigate the efficiency of different features individually
and comparatively for predicting the future interest of users with different activity levels in
social networks over on unobserved topics. After conducting experiments on a real-world dataset
sourced from Twitter, we have identified several noteworthy findings: 1) relevance feature
in the category of past explicit user-topic engagement is the strongest indicator for predicting
user’s future interest across all user groups, with an observed 8.57% decrease in NDCG and an
8.95% decrease in MAP when it is removed in the ablation study. 2) the observation of an 8.06%
decrease in NDCG and a 7.3% decrease in MAP, when topical features such as popularity,
freshness, and coherence are removed in the ablation study, highlights their significance as
among the strongest indicators for users’ future interest, particularly for low-activity users. 3)
although temporal features show a clear positive impact across user groups with varying levels
of activity (resulting in a 4.5% decrease in NDCG and a 7.3% decrease in MAP when removed
in the ablation study), the temporal topical features do not demonstrate a significant positive
effect, and 4) The removal of user-specific characteristics such as influence and personality
traits in the ablation study reveals their significant impact in predicting future interest over
cold topics, reflected by a 5.49% decrease in NDCG and a 5.72% decrease in MAP. Our findings
make significant contributions to the field of future interest prediction, offering valuable insights
and practical implications for various applications in social network analysis.
Close
undefined
Close