Differences between versions

ItemVersion from 19:53, 9 April 2024 by Suffusion of YellowVersion from 21:15, 9 April 2024 by Suffusion of Yellow
Basic information
Notes:
Same word list as 1296. Words with too many legitimate uses to disallow outright. Instead, there must be some other clues of vandalism.
Same word list as 1296. Words with too many legitimate uses to disallow outright. Instead, there must be some other clues of vandalism.


Of course, this will take a huge amount of refinement. Therefore ALWAYS keep the word list in sync with 1296; otherwise it will be impossible to check if any change will cause FPs! --Suffusion of Yellow 23:16 4 Apr 2024
Of course, this will take a huge amount of refinement. Therefore ALWAYS keep the word list in sync with 1296; otherwise it will be impossible to check if any change will cause FPs! --Suffusion of Yellow 23:16 4 Apr 2024


No harm in tagging for now; already down a few percent false positives. --Suffusion of Yellow 19:51 9 Apr 2024
No harm in tagging for now; already down a few percent false positives. --Suffusion of Yellow 19:51 9 Apr 2024
Use length of summary, instead of all-or-nothing. Also catch text added to beginning of single line. --Suffusion of Yellow 21:14 9 Apr 2024
Filter conditions
Conditions:
( documentation)
sus := "(?x)\b(?:
sus := "(?x)\b(?:
     #Common words
     #Common words
     amazing
     amazing
     |anal
     |anal
     |ass+
     |ass+
     |balls
     |balls
     |big[ ]black
     |big[ ]black
     |boobs
     |boobs
     |booty
     |booty
     |bum
     |bum
     |butt
     |butt
     |caca
     |caca
     |cheeks
     |cheeks
     |cool(?:est)?
     |cool(?:est)?
     |creeps?
     |creeps?
     |cum
     |cum
     |daddy
     |daddy
     |fart(?:ed|ing|s)?
     |fart(?:ed|ing|s)?
     |fat
     |fat
     |gay(?:est|s)?
     |gay(?:est|s)?
     |haha
     |haha
     |hehe
     |hehe
     |hello
     |hello
     |hola
     |hola
     |(?<!\S)hi(?!\S)
     |(?<!\S)hi(?!\S)
     |i[ ](?:like|love|hate)
     |i[ ](?:like|love|hate)
     |idk
     |idk
     |is[ ]fake
     |is[ ]fake
     |is[ ]the[ ](?:best|worst)
     |is[ ]the[ ](?:best|worst)
     |m[ou]m(?:my)?
     |m[ou]m(?:my)?
     |morons?
     |morons?
     |nonces?
     |nonces?
     |oh[ ]no
     |oh[ ]no
     |omg
     |omg
     |poo+
     |poo+
     |porno?
     |porno?
     |puta
     |puta
     |racists?
     |racists?
     |retards?
     |retards?
     |sexy
     |sexy
     |scumbags?
     |scumbags?
     |smell[ys]  
     |smell[ys]  
     |stink[ys]
     |stink[ys]
     |stupid
     |stupid
     |suck(?:ed|ing|s)?
     |suck(?:ed|ing|s)?
     |tits
     |tits
     |toes
     |toes
     |vaginas?
     |vaginas?
     |yummy
     |yummy
     #Button pushing
     #Button pushing
     |bold[ ]text
     |bold[ ]text
     |italic[ ]text
     |italic[ ]text
     |ref></ref
     |ref></ref
      
      
     #Memes
     #Memes
     |among[ ]us
     |among[ ]us
     |fortnite
     |fortnite
     |ronaldo
     |ronaldo
     |sigmas?
     |sigmas?
     |the[ ]goat
     |the[ ]goat
)\b";
)\b";




page_namespace == 0 &
page_namespace == 0 &
!("confirmed" in user_groups) &
!("confirmed" in user_groups) &
edit_delta < 1000 &
edit_delta < 1000 &
(
(
     match := get_matches("(?i)" + sus, added_lines)[0];
     match := get_matches("(?i)" + sus, added_lines)[0];
     match & (
     match & (
         escaped_match := "(?:\b" + rescape(match) + "\b)";
         escaped_match := "(?:\b" + rescape(match) + "\b)";
         !(removed_lines irlike sus) &
         !(removed_lines irlike sus) &
         !((old_wikitext + added_links) irlike escaped_match)
         !((old_wikitext + added_links) irlike escaped_match)
     )
     )
) & (
) & (
     /* Baseline AGF */
     /* Baseline AGF */
     score := 1.0;
     score := 0.5;


     /* More AGF on "sweary" pages */
     /* More AGF on "sweary" pages */
     score := score + 0.25 * rcount("(?i)" + sus, old_wikitext);
     score := score + 0.25 * rcount("(?i)" + sus, old_wikitext);


     /* More AGF on fiction or music related pages */
     /* More AGF on fiction or music related pages */
     score := score + 2.0 * (new_wikitext irlike "(?x)
     score := score + 2.0 * (new_wikitext irlike "(?x)
         category:.*(?:films|shows|books|episodes|bands|musical[ ]groups|albums|songs)
         category:.*(?:films|shows|books|episodes|bands|musical[ ]groups|albums|songs)
         |discography
         |discography
         |filmography
         |filmography
     ");  
     ");  
      
      
     /* Added references */
     /* Added references */
     ref_cnt := (count("<ref", added_lines) - count("<ref", removed_lines));
     ref_cnt := (count("<ref", added_lines) - count("<ref", removed_lines));
     clamped_ref_cnt := ref_cnt < -1 ? -1 : ref_cnt;
     clamped_ref_cnt := ref_cnt < -1 ? -1 : ref_cnt;
     score := score + 2.0 * clamped_ref_cnt;
     score := score + 2.0 * clamped_ref_cnt;


     /* Added markup */
     /* Added markup */
     markup_cnt := (rcount("[[\]{}|*#=]", added_lines)) - (rcount("[[\]{}|*#=]", removed_lines));
     markup_cnt := (rcount("[[\]{}|*#=]", added_lines)) - (rcount("[[\]{}|*#=]", removed_lines));
     clamped_markup_cnt := markup_cnt < -10 ? -10 : markup_cnt;
     clamped_markup_cnt := markup_cnt < -10 ? -10 : markup_cnt;
     score := score + 0.1 * clamped_markup_cnt;
     score := score + 0.1 * clamped_markup_cnt;


     /* Added quotes or italics */
     /* Added quotes or italics */
     quote_cnt := (rcount("(?<!')''(?!')|\"", added_lines)) - rcount("(?<!')''(?!')|\"", removed_lines);
     quote_cnt := (rcount("(?<!')''(?!')|\"", added_lines)) - rcount("(?<!')''(?!')|\"", removed_lines);
     clamped_quote_cnt := quote_cnt < 0 ? 0 : quote_cnt;
     clamped_quote_cnt := quote_cnt < 0 ? 0 : quote_cnt;
     score := score + 0.5 * clamped_quote_cnt;
     score := score + 0.5 * clamped_quote_cnt;


     /* Unenclopedic language */
     /* Unencyclopedic language */
     bonus_words := "\b(?:i|me|my|your?)\b";
     bonus_words := "\b(?:i|me|my|your?)\b";
     score := score - 0.5 * (
     score := score - 0.5 * (
         added_lines irlike bonus_words &  
         added_lines irlike bonus_words &  
         !(removed_lines irlike bonus_words) &
         !(removed_lines irlike bonus_words) &
         !(match irlike bonus_words) /* Avoid double-counting "i like", etc. */
         !(match irlike bonus_words) /* Avoid double-counting "i like", etc. */
     );
     );


     /* No summary */
     /* Summary length */
     score := score - 0.5 * (summary irlike "^(?:/\*.*?\*/)?\s*$");
     sum := get_matches("^(?:/\*.*?\*/)?\s*(.*)$", summary)[1];
    score := score + 0.01 * length(sum);;


     /* Did they add these words and do nothing else except adjust whitespace and punctuation? */
     /* Did they add these words and do nothing else except adjust whitespace and punctuation? */
     score := score - 2.0 * (norm(str_replace_regexp(added_lines, sus, "")) == norm(removed_lines));
     score := score - 2.0 * (norm(str_replace_regexp(added_lines, sus, "")) == norm(removed_lines));


     /* Multiple bad words */
     /* Multiple bad words */
     extra_cnt := rcount("(?i)" + sus, added_lines) - 1;
     extra_cnt := rcount("(?i)" + sus, added_lines) - 1;
     clamped_extra_cnt := extra_cnt > 4 ? 4 : extra_cnt;
     clamped_extra_cnt := extra_cnt > 4 ? 4 : extra_cnt;
     score := score - 0.5 * clamped_extra_cnt;
     score := score - 0.5 * clamped_extra_cnt;
      
      
     /* Back-to-back bad words */
     /* Back-to-back bad words */
     score := score - 2.0 * (added_lines irlike ("(?:(?:" + sus + ")\W*){2}"));
     score := score - 2.0 * (added_lines irlike ("(?:(?:" + sus + ")\W*){2}"));


     /* If the word count is exactly the same, that probably means they just swapped out one word, or did a search-and-replace */
     /* If the word count is exactly the same, that probably means they just swapped out one word, or did a search-and-replace */
     score := score - 1.0 * (rcount("\w+", added_lines) == rcount("\w+", removed_lines));
     score := score - 1.0 * (rcount("\w+", added_lines) == rcount("\w+", removed_lines));


     /* Anything in all caps, not necessarily these words */
     /* Anything in all caps, not necessarily these words */
     score := score - 1.0 * (rcount("[A-Z]{4,}", added_lines) > rcount("[A-Z]{4,}", removed_lines));
     score := score - 1.0 * (rcount("[A-Z]{4,}", added_lines) > rcount("[A-Z]{4,}", removed_lines));


     if (!quote_cnt & !ref_cnt & !markup_cnt) then (
     if (!quote_cnt & !ref_cnt & !markup_cnt) then (
         /* Added plain text to the end of a single line */
         /* Added plain text to the end of a single line */
         score := score - 1.0 * (
         score := score - 1.0 * (
                 strpos(rmwhitespace(added_lines), rmwhitespace(removed_lines)) == 0 &
                 strpos(rmwhitespace(added_lines), rmwhitespace(removed_lines)) != -1 &
                 length(added_lines) == length(removed_lines)
                 length(added_lines) == 1 &
                length(removed_lines) == 1
         );
         );


         /* Added plain text to the start or end of the page */
         /* Added plain text to the start or end of the page */
         score := score - 2.0 * (strpos(rmwhitespace(new_wikitext), rmwhitespace(old_wikitext)) != -1);
         score := score - 2.0 * (strpos(rmwhitespace(new_wikitext), rmwhitespace(old_wikitext)) != -1);
       
        /* No changes at all to punctuation or markup */
        score := score - 2.0 * (str_replace_regexp(added_lines, "[\w\s]", "") == str_replace_regexp(removed_lines, "[\w\s]", ""));
     ) end;
     ) end;
   
    /* No changes at all to punctuation or markup */
    score := score - 2.0 * (str_replace_regexp(added_lines, "[\w\s]", "") == str_replace_regexp(removed_lines, "[\w\s]", ""));
      
      
     /* Not much use of the shift key */
     /* Not much use of the shift key */
     score := score - 1.0 * (rcount('[A-Z]', added_lines) == rcount('[A-Z]', removed_lines));
     score := score - 1.0 * (rcount('[A-Z]', added_lines) == rcount('[A-Z]', removed_lines));
      
      
     /* Red link containing the specific word they added */
     /* Red link containing the specific word they added */
     score := score - 2.0 * (new_html irlike ('class="new" title="[^"]*' + escaped_match));
     score := score - 2.0 * (new_html irlike ('class="new" title="[^"]*' + escaped_match));


     score < 0
     score < 0
)
)

Differences between versions

ItemVersion from 19:53, 9 April 2024 by Suffusion of YellowVersion from 21:15, 9 April 2024 by Suffusion of Yellow
Basic information
Notes:
Same word list as 1296. Words with too many legitimate uses to disallow outright. Instead, there must be some other clues of vandalism.
Same word list as 1296. Words with too many legitimate uses to disallow outright. Instead, there must be some other clues of vandalism.


Of course, this will take a huge amount of refinement. Therefore ALWAYS keep the word list in sync with 1296; otherwise it will be impossible to check if any change will cause FPs! --Suffusion of Yellow 23:16 4 Apr 2024
Of course, this will take a huge amount of refinement. Therefore ALWAYS keep the word list in sync with 1296; otherwise it will be impossible to check if any change will cause FPs! --Suffusion of Yellow 23:16 4 Apr 2024


No harm in tagging for now; already down a few percent false positives. --Suffusion of Yellow 19:51 9 Apr 2024
No harm in tagging for now; already down a few percent false positives. --Suffusion of Yellow 19:51 9 Apr 2024
Use length of summary, instead of all-or-nothing. Also catch text added to beginning of single line. --Suffusion of Yellow 21:14 9 Apr 2024
Filter conditions
Conditions:
( documentation)
sus := "(?x)\b(?:
sus := "(?x)\b(?:
     #Common words
     #Common words
     amazing
     amazing
     |anal
     |anal
     |ass+
     |ass+
     |balls
     |balls
     |big[ ]black
     |big[ ]black
     |boobs
     |boobs
     |booty
     |booty
     |bum
     |bum
     |butt
     |butt
     |caca
     |caca
     |cheeks
     |cheeks
     |cool(?:est)?
     |cool(?:est)?
     |creeps?
     |creeps?
     |cum
     |cum
     |daddy
     |daddy
     |fart(?:ed|ing|s)?
     |fart(?:ed|ing|s)?
     |fat
     |fat
     |gay(?:est|s)?
     |gay(?:est|s)?
     |haha
     |haha
     |hehe
     |hehe
     |hello
     |hello
     |hola
     |hola
     |(?<!\S)hi(?!\S)
     |(?<!\S)hi(?!\S)
     |i[ ](?:like|love|hate)
     |i[ ](?:like|love|hate)
     |idk
     |idk
     |is[ ]fake
     |is[ ]fake
     |is[ ]the[ ](?:best|worst)
     |is[ ]the[ ](?:best|worst)
     |m[ou]m(?:my)?
     |m[ou]m(?:my)?
     |morons?
     |morons?
     |nonces?
     |nonces?
     |oh[ ]no
     |oh[ ]no
     |omg
     |omg
     |poo+
     |poo+
     |porno?
     |porno?
     |puta
     |puta
     |racists?
     |racists?
     |retards?
     |retards?
     |sexy
     |sexy
     |scumbags?
     |scumbags?
     |smell[ys]  
     |smell[ys]  
     |stink[ys]
     |stink[ys]
     |stupid
     |stupid
     |suck(?:ed|ing|s)?
     |suck(?:ed|ing|s)?
     |tits
     |tits
     |toes
     |toes
     |vaginas?
     |vaginas?
     |yummy
     |yummy
     #Button pushing
     #Button pushing
     |bold[ ]text
     |bold[ ]text
     |italic[ ]text
     |italic[ ]text
     |ref></ref
     |ref></ref
      
      
     #Memes
     #Memes
     |among[ ]us
     |among[ ]us
     |fortnite
     |fortnite
     |ronaldo
     |ronaldo
     |sigmas?
     |sigmas?
     |the[ ]goat
     |the[ ]goat
)\b";
)\b";




page_namespace == 0 &
page_namespace == 0 &
!("confirmed" in user_groups) &
!("confirmed" in user_groups) &
edit_delta < 1000 &
edit_delta < 1000 &
(
(
     match := get_matches("(?i)" + sus, added_lines)[0];
     match := get_matches("(?i)" + sus, added_lines)[0];
     match & (
     match & (
         escaped_match := "(?:\b" + rescape(match) + "\b)";
         escaped_match := "(?:\b" + rescape(match) + "\b)";
         !(removed_lines irlike sus) &
         !(removed_lines irlike sus) &
         !((old_wikitext + added_links) irlike escaped_match)
         !((old_wikitext + added_links) irlike escaped_match)
     )
     )
) & (
) & (
     /* Baseline AGF */
     /* Baseline AGF */
     score := 1.0;
     score := 0.5;


     /* More AGF on "sweary" pages */
     /* More AGF on "sweary" pages */
     score := score + 0.25 * rcount("(?i)" + sus, old_wikitext);
     score := score + 0.25 * rcount("(?i)" + sus, old_wikitext);


     /* More AGF on fiction or music related pages */
     /* More AGF on fiction or music related pages */
     score := score + 2.0 * (new_wikitext irlike "(?x)
     score := score + 2.0 * (new_wikitext irlike "(?x)
         category:.*(?:films|shows|books|episodes|bands|musical[ ]groups|albums|songs)
         category:.*(?:films|shows|books|episodes|bands|musical[ ]groups|albums|songs)
         |discography
         |discography
         |filmography
         |filmography
     ");  
     ");  
      
      
     /* Added references */
     /* Added references */
     ref_cnt := (count("<ref", added_lines) - count("<ref", removed_lines));
     ref_cnt := (count("<ref", added_lines) - count("<ref", removed_lines));
     clamped_ref_cnt := ref_cnt < -1 ? -1 : ref_cnt;
     clamped_ref_cnt := ref_cnt < -1 ? -1 : ref_cnt;
     score := score + 2.0 * clamped_ref_cnt;
     score := score + 2.0 * clamped_ref_cnt;


     /* Added markup */
     /* Added markup */
     markup_cnt := (rcount("[[\]{}|*#=]", added_lines)) - (rcount("[[\]{}|*#=]", removed_lines));
     markup_cnt := (rcount("[[\]{}|*#=]", added_lines)) - (rcount("[[\]{}|*#=]", removed_lines));
     clamped_markup_cnt := markup_cnt < -10 ? -10 : markup_cnt;
     clamped_markup_cnt := markup_cnt < -10 ? -10 : markup_cnt;
     score := score + 0.1 * clamped_markup_cnt;
     score := score + 0.1 * clamped_markup_cnt;


     /* Added quotes or italics */
     /* Added quotes or italics */
     quote_cnt := (rcount("(?<!')''(?!')|\"", added_lines)) - rcount("(?<!')''(?!')|\"", removed_lines);
     quote_cnt := (rcount("(?<!')''(?!')|\"", added_lines)) - rcount("(?<!')''(?!')|\"", removed_lines);
     clamped_quote_cnt := quote_cnt < 0 ? 0 : quote_cnt;
     clamped_quote_cnt := quote_cnt < 0 ? 0 : quote_cnt;
     score := score + 0.5 * clamped_quote_cnt;
     score := score + 0.5 * clamped_quote_cnt;


     /* Unenclopedic language */
     /* Unencyclopedic language */
     bonus_words := "\b(?:i|me|my|your?)\b";
     bonus_words := "\b(?:i|me|my|your?)\b";
     score := score - 0.5 * (
     score := score - 0.5 * (
         added_lines irlike bonus_words &  
         added_lines irlike bonus_words &  
         !(removed_lines irlike bonus_words) &
         !(removed_lines irlike bonus_words) &
         !(match irlike bonus_words) /* Avoid double-counting "i like", etc. */
         !(match irlike bonus_words) /* Avoid double-counting "i like", etc. */
     );
     );


     /* No summary */
     /* Summary length */
     score := score - 0.5 * (summary irlike "^(?:/\*.*?\*/)?\s*$");
     sum := get_matches("^(?:/\*.*?\*/)?\s*(.*)$", summary)[1];
    score := score + 0.01 * length(sum);;


     /* Did they add these words and do nothing else except adjust whitespace and punctuation? */
     /* Did they add these words and do nothing else except adjust whitespace and punctuation? */
     score := score - 2.0 * (norm(str_replace_regexp(added_lines, sus, "")) == norm(removed_lines));
     score := score - 2.0 * (norm(str_replace_regexp(added_lines, sus, "")) == norm(removed_lines));


     /* Multiple bad words */
     /* Multiple bad words */
     extra_cnt := rcount("(?i)" + sus, added_lines) - 1;
     extra_cnt := rcount("(?i)" + sus, added_lines) - 1;
     clamped_extra_cnt := extra_cnt > 4 ? 4 : extra_cnt;
     clamped_extra_cnt := extra_cnt > 4 ? 4 : extra_cnt;
     score := score - 0.5 * clamped_extra_cnt;
     score := score - 0.5 * clamped_extra_cnt;
      
      
     /* Back-to-back bad words */
     /* Back-to-back bad words */
     score := score - 2.0 * (added_lines irlike ("(?:(?:" + sus + ")\W*){2}"));
     score := score - 2.0 * (added_lines irlike ("(?:(?:" + sus + ")\W*){2}"));


     /* If the word count is exactly the same, that probably means they just swapped out one word, or did a search-and-replace */
     /* If the word count is exactly the same, that probably means they just swapped out one word, or did a search-and-replace */
     score := score - 1.0 * (rcount("\w+", added_lines) == rcount("\w+", removed_lines));
     score := score - 1.0 * (rcount("\w+", added_lines) == rcount("\w+", removed_lines));


     /* Anything in all caps, not necessarily these words */
     /* Anything in all caps, not necessarily these words */
     score := score - 1.0 * (rcount("[A-Z]{4,}", added_lines) > rcount("[A-Z]{4,}", removed_lines));
     score := score - 1.0 * (rcount("[A-Z]{4,}", added_lines) > rcount("[A-Z]{4,}", removed_lines));


     if (!quote_cnt & !ref_cnt & !markup_cnt) then (
     if (!quote_cnt & !ref_cnt & !markup_cnt) then (
         /* Added plain text to the end of a single line */
         /* Added plain text to the end of a single line */
         score := score - 1.0 * (
         score := score - 1.0 * (
                 strpos(rmwhitespace(added_lines), rmwhitespace(removed_lines)) == 0 &
                 strpos(rmwhitespace(added_lines), rmwhitespace(removed_lines)) != -1 &
                 length(added_lines) == length(removed_lines)
                 length(added_lines) == 1 &
                length(removed_lines) == 1
         );
         );


         /* Added plain text to the start or end of the page */
         /* Added plain text to the start or end of the page */
         score := score - 2.0 * (strpos(rmwhitespace(new_wikitext), rmwhitespace(old_wikitext)) != -1);
         score := score - 2.0 * (strpos(rmwhitespace(new_wikitext), rmwhitespace(old_wikitext)) != -1);
       
        /* No changes at all to punctuation or markup */
        score := score - 2.0 * (str_replace_regexp(added_lines, "[\w\s]", "") == str_replace_regexp(removed_lines, "[\w\s]", ""));
     ) end;
     ) end;
   
    /* No changes at all to punctuation or markup */
    score := score - 2.0 * (str_replace_regexp(added_lines, "[\w\s]", "") == str_replace_regexp(removed_lines, "[\w\s]", ""));
      
      
     /* Not much use of the shift key */
     /* Not much use of the shift key */
     score := score - 1.0 * (rcount('[A-Z]', added_lines) == rcount('[A-Z]', removed_lines));
     score := score - 1.0 * (rcount('[A-Z]', added_lines) == rcount('[A-Z]', removed_lines));
      
      
     /* Red link containing the specific word they added */
     /* Red link containing the specific word they added */
     score := score - 2.0 * (new_html irlike ('class="new" title="[^"]*' + escaped_match));
     score := score - 2.0 * (new_html irlike ('class="new" title="[^"]*' + escaped_match));


     score < 0
     score < 0
)
)

Videos

Youtube | Vimeo | Bing

Websites

Google | Yahoo | Bing

Encyclopedia

Google | Yahoo | Bing

Facebook