function qa_db_search_posts_selectspec($voteuserid, $titlewords, $contentwords, $tagwords, $handlewords, $handle, $start, $full=false, $count=QA_DB_RETRIEVE_QS_AS)
/*
Return the selectspec to retrieve the $count top question matches, starting from the offset $start,
with the corresponding vote made by $voteuserid (if not null) and including $full content or not.
The search is performed for any of $titlewords in the title, $contentwords in the content (of the
question or an answer or comment for whom that is the antecedent question), $tagwords in tags, for
question author usernames which match a word in $handlewords or which match $handle as a whole.
The results also include a 'score' column based on the matching strength and post hotness,
and a 'matchparts' column that tells us where the score came from (since a question could get weight
from a match in the question itself, and/or weight from a match in its answers, comments, or
comments on answers). The 'matchparts' is a comma-separated list of tuples
matchtype:matchpostid:matchscore to be used with qa_search_max_match_anchor().
*/
{
// add LOG(postid)/1000000 here to ensure ordering is deterministic even if several posts have same score
// The score also gives a bonus for hot questions, where the bonus scales linearly with hotness. The hottest
// question gets a bonus equivalent to a matching unique tag, and the least hot question gets zero bonus.
$selectspec=qa_db_posts_basic_selectspec($voteuserid, $full);
$selectspec['columns'][]='score';
$selectspec['columns'][]='matchparts';
$selectspec['source'].=" JOIN (SELECT questionid, SUM(score)+2*(LOG(#)*(^posts.hotness-(SELECT MIN(hotness) FROM ^posts WHERE type='Q'))/((SELECT MAX(hotness) FROM ^posts WHERE type='Q')-(SELECT MIN(hotness) FROM ^posts WHERE type='Q')))+LOG(questionid)/1000000 AS score, GROUP_CONCAT(CONCAT_WS(':', matchposttype, matchpostid, ROUND(score,3))) AS matchparts FROM (";
$selectspec['sortdesc']='score';
array_push($selectspec['arguments'], QA_IGNORED_WORDS_FREQ);
$selectparts=0;
if (!empty($titlewords)) {
// At the indexing stage, duplicate words in title are ignored, so this doesn't count multiple appearances.
$selectspec['source'].=($selectparts++ ? " UNION ALL " : "").
"(SELECT postid AS questionid, LOG(#/titlecount) AS score, _utf8 'Q' AS matchposttype, postid AS matchpostid FROM ^titlewords JOIN ^words ON ^titlewords.wordid=^words.wordid WHERE word IN ($) AND titlecount<#)";
array_push($selectspec['arguments'], QA_IGNORED_WORDS_FREQ, $titlewords, QA_IGNORED_WORDS_FREQ);
}
if (!empty($contentwords)) {
// (1-1/(1+count)) weights words in content based on their frequency: If a word appears once in content
// it's equivalent to 1/2 an appearance in the title (ignoring the contentcount/titlecount factor).
// If it appears an infinite number of times, it's equivalent to one appearance in the title.
// This will discourage keyword stuffing while still giving some weight to multiple appearances.
// On top of that, answer matches are worth half a question match, and comment matches half again.
$selectspec['source'].=($selectparts++ ? " UNION ALL " : "").
"(SELECT questionid, (1-1/(1+count))*LOG(#/contentcount)*(CASE ^contentwords.type WHEN 'Q' THEN 1.0 WHEN 'A' THEN 0.5 ELSE 0.25 END) AS score, ^contentwords.type AS matchposttype, ^contentwords.postid AS matchpostid FROM ^contentwords JOIN ^words ON ^contentwords.wordid=^words.wordid WHERE word IN ($) AND contentcount<#)";
array_push($selectspec['arguments'], QA_IGNORED_WORDS_FREQ, $contentwords, QA_IGNORED_WORDS_FREQ);
}
if (!empty($tagwords)) {
// Appearances in the tag words count like 2 appearances in the title (ignoring the tagcount/titlecount factor).
// This is because tags express explicit semantic intent, whereas titles do not necessarily.
$selectspec['source'].=($selectparts++ ? " UNION ALL " : "").
"(SELECT postid AS questionid, 2*LOG(#/tagwordcount) AS score, _utf8 'Q' AS matchposttype, postid AS matchpostid FROM ^tagwords JOIN ^words ON ^tagwords.wordid=^words.wordid WHERE word IN ($) AND tagwordcount<#)";
array_push($selectspec['arguments'], QA_IGNORED_WORDS_FREQ, $tagwords, QA_IGNORED_WORDS_FREQ);
}
if (!empty($handlewords)) {
if (QA_FINAL_EXTERNAL_USERS) {
$userids=qa_get_userids_from_public($handlewords);
if (count($userids)) {
$selectspec['source'].=($selectparts++ ? " UNION ALL " : "").
"(SELECT postid AS questionid, LOG(#/qposts) AS score, _utf8 'Q' AS matchposttype, postid AS matchpostid FROM ^posts JOIN ^userpoints ON ^posts.userid=^userpoints.userid WHERE ^posts.userid IN ($) AND type='Q')";
array_push($selectspec['arguments'], QA_IGNORED_WORDS_FREQ, $userids);
}
} else {
$selectspec['source'].=($selectparts++ ? " UNION ALL " : "").
"(SELECT postid AS questionid, LOG(#/qposts) AS score, _utf8 'Q' AS matchposttype, postid AS matchpostid FROM ^posts JOIN ^users ON ^posts.userid=^users.userid JOIN ^userpoints ON ^userpoints.userid=^users.userid WHERE handle IN ($) AND type='Q')";
array_push($selectspec['arguments'], QA_IGNORED_WORDS_FREQ, $handlewords);
}
}
if (strlen($handle)) { // to allow searching for multi-word usernames (only works if search query contains full username and nothing else)
if (QA_FINAL_EXTERNAL_USERS) {
$userids=qa_get_userids_from_public(array($handle));
$userid=@$userids[$handle];
if (strlen($userid)) {
$selectspec['source'].=($selectparts++ ? " UNION ALL " : "").
"(SELECT postid AS questionid, LOG(#/qposts) AS score, _utf8 'Q' AS matchposttype, postid AS matchpostid FROM ^posts JOIN ^userpoints ON ^posts.userid=^userpoints.userid WHERE ^posts.userid=$ AND type='Q')";
array_push($selectspec['arguments'], QA_IGNORED_WORDS_FREQ, $userid);
}
} else {
$selectspec['source'].=($selectparts++ ? " UNION ALL " : "").
"(SELECT postid AS questionid, LOG(#/qposts) AS score, _utf8 'Q' AS matchposttype, postid AS matchpostid FROM ^posts JOIN ^users ON ^posts.userid=^users.userid JOIN ^userpoints ON ^userpoints.userid=^users.userid WHERE handle=$ AND type='Q')";
array_push($selectspec['arguments'], QA_IGNORED_WORDS_FREQ, $handle);
}
}
if ($selectparts==0)
$selectspec['source'].='(SELECT NULL as questionid, 0 AS score, NULL AS matchposttype, NULL AS matchpostid FROM ^posts WHERE postid=NULL)';
$selectspec['source'].=") x LEFT JOIN ^posts ON ^posts.postid=questionid GROUP BY questionid ORDER BY score DESC LIMIT #,#) y ON ^posts.postid=y.questionid";
array_push($selectspec['arguments'], $start, $count);
return $selectspec;
}