User:Phlsph7/HighlightUnreferencedPassages.js

From Wikipedia, the free encyclopedia
Note: After saving, you have to bypass your browser's cache to see the changes. Google Chrome, Firefox, Microsoft Edge and Safari: Hold down the ⇧ Shift key and click the Reload toolbar button. For details and instructions about other browsers, see Wikipedia:Bypass your cache.
/*** Highlight unreferenced passages ***/
(function(){
	// finds passages that lack references and marks them
	function markUnreferencedPassages(){
		// stylesheet to color passages lacking references
		function addStylesheet(){
			const stylesheet = document.createElement('style');
			stylesheet.innerHTML = `
			.has-no-references, .Template-Fact {
				background: LightPink;
			}
			`;
			document.head.appendChild(stylesheet);
		}
		
		// check whether references are relevant to the element 
		function isEligible(element, excludedSections){
			// exclude elements that are part of navboxes, sidebars, and the like
			// references do not matter for them
			if(hasParentClass(element, 'navbox')
				|| hasParentClass(element, 'sidebar')
				|| hasParentClass(element, 'infobox')
				|| hasParentClass(element, 'side-box-flex')
				|| hasParentClass(element, 'noprint')
				|| hasParentClass(element, 'refbegin')
				|| hasParentClass(element, 'gallery')
				|| hasParentClass(element, 'toc')
				|| hasParentClass(element, 'reflist')){
				return false;
			}
			
			// exclude elements that belong to certain sections where references do not matter
			const sectionName = getSectionName(element);
			if(excludedSections.indexOf(sectionName) != -1){
				return false;
			}
			
			return true;
		}
		
		// utility function to check whether the elements parents and grand parents have a certain class
		function hasParentClass(element, className){
			return element.closest('.' + className) != null;
		}
		
		// utility function to get the section name to which an element belongs
		function getSectionName(element){
			var mainContainerChildElement = getGrandchildOfMainContainer(element);
			var sectionName = ''; // default section name, this corresponds to the lead
			var previousElement = mainContainerChildElement.previousElementSibling;
			
			// sections always start with an h2 element
			// the script loops back from an element to the previous element until an h2 element is discovered
			while(previousElement != null){
				// check whether it is an h2 element
				if(previousElement.tagName == 'H2'){
					// extract the section name
					sectionName = previousElement.innerText.split('[edit]').join('');
					break;
				}
				previousElement = previousElement.previousElementSibling;
			}
			
			return sectionName;
		}
		
		// utility function: for any element, return the parent that is a grandchild of the main container
		function getGrandchildOfMainContainer(element){
			const mainContainer = document.getElementById('mw-content-text');
			if(element.parentElement.parentElement == mainContainer){
				return element;
			}
			else{
				return getGrandchildOfMainContainer(element.parentElement);
			}
		}
		
		// utility function to check whether the lead section of the article should be marked
		function shouldMarkLead(){
			// if it is a draft then the lead should be highlighted
			var pageTitleNamespace = document.getElementsByClassName('mw-page-title-namespace')[0];
			if(pageTitleNamespace != null){
				if(pageTitleNamespace.innerText === 'User' || pageTitleNamespace.innerText === 'Draft'){
					return true;
				}
			}
			
			// if it is a stub then the lead should be highlighted
			else if(document.getElementsByClassName('stub').length > 0){
				return true;
			}
			
			// otherwise not
			else {
				return false;
			}
		}
		
		// marks elements that lack references
		function mark(element){
			// mark elements without any reference elements
			if(element.getElementsByClassName('reference').length == 0){
				element.classList.add("has-no-references");
			}
			
			// mark elements with some reference elements
			else{
				// if the last element is not a reference then start marking it
				markUntilPreviousReference(element.lastChild);
				
				// starts from each "citation needed" tag, goes backwards and marks until it reaches a reference 
				var citationNeededTags = element.getElementsByClassName('Template-Fact');
				for(var citationNeededTag of citationNeededTags){
					markUntilPreviousReference(citationNeededTag);
				}
			}
		}
		
		// Function to mark unreferenced passages. It starts with one node and loops back to previous nodes until it hits a reference
		function markUntilPreviousReference(childNode){
			var currentNode = childNode;
			while(currentNode != null){
				// handle nodes that are not HTML elements
				if(currentNode.classList == null){
					// create a span element and classify it
					var span = document.createElement('span');
					span.classList.add("has-no-references");
					
					// copy the node's text into the span element and replace the node with the span element
					span.innerHTML = currentNode.data;
					currentNode.parentElement.replaceChild(span, currentNode);
					currentNode = span;
				}
				
				// handle nodes that are HTML elements
				// if the node is a reference
				else if(currentNode.classList.contains('reference')){
					
					// check whether the node is an actual reference: they contain numbers
					if(currentNode != null && 
						currentNode.innerText != null && 
						/[0-9]/.test(currentNode.innerText) &&
						!currentNode.innerText.toLowerCase().includes('note') &&
						!currentNode.innerText.toLowerCase().includes('nb')){
					
						break;
					}
					// otherwise it is an explanatory footnote and not a reference
					else{
						currentNode.classList.add('has-no-references');
					}
				}
				
				// if the node is an element but not a reference then classify it
				else {
					currentNode.classList.add('has-no-references');
				}
				
				// set the current node to the previous one to continue the loop
				currentNode = currentNode.previousSibling;
			}
		}
		
		// removes the red background from elements that were falsely highlighted
		function excludeFalsePositives(){		
			// exclude references used in nested lists
			var unreferencedElements = document.getElementsByClassName('has-no-references');
			for(let unreferencedElement of unreferencedElements){
				// if the element contains a reference inside then it is not unreferenced, so remove the class
				if(unreferencedElement.getElementsByClassName('reference').length > 0){
					unreferencedElement.classList.remove('has-no-references');
				}
			}
			
			// exclude quoteboxes
			unreferencedElements = document.getElementsByClassName('has-no-references');
			for(let unreferencedElement of unreferencedElements){
				// see if the the element is part of a quotebox that has a citation
				var quoteboxParent = unreferencedElement.closest('.quotebox');
				if(quoteboxParent != null && quoteboxParent.getElementsByTagName('cite').length > 0){
					unreferencedElement.classList.remove('has-no-references');
				}
			}
			
			// do not mark empty elements
			unreferencedElements = document.getElementsByClassName('has-no-references');
			for(let unreferencedElement of unreferencedElements){
					if(unreferencedElement.innerHTML == "\n" || unreferencedElement.innerHTML == " \n"){
						unreferencedElement.classList.remove('has-no-references');
					}
			}
			
			// exclude the template {{rp}}
			var referenceElements = document.getElementsByClassName('reference');
			for(let referenceElement of referenceElements){
				if(referenceElement.classList.contains('has-no-references')){
					referenceElement.classList.remove('has-no-references');
				}
			}
			
			// blockquotes often use a different reference style, so false positives need to be excluded separately
			const unreferencedParagraphsInsideBlockquotes = document.querySelectorAll('blockquote > p.has-no-references');
			for(var unreferencedParagraphInsideBlockquotes of unreferencedParagraphsInsideBlockquotes){
				const parent = unreferencedParagraphInsideBlockquotes.parentElement;
				// check whether the parent blockquote contains a citation element
				if(parent.getElementsByClassName('templatequotecite').length > 0){
					// if it does then the paragraph is not unreferenced
					unreferencedParagraphInsideBlockquotes.classList.remove('has-no-references');
				}
			}
			
			// ignore elements in the template "ombox"
			let unreferencedElementsInOmboxes = document.querySelectorAll(".ombox .has-no-references");
			for (let element of unreferencedElementsInOmboxes) {
			  element.classList.remove("has-no-references");
			}
			
			// for drafts: exclude comments
			var pageTitleNamespace = document.getElementsByClassName('mw-page-title-namespace')[0];
			if(pageTitleNamespace != null){
				if(pageTitleNamespace.innerText === 'User' || pageTitleNamespace.innerText === 'Draft'){
					let unreferencedComments = document.querySelectorAll(".has-no-references:has(.localcomments)");
					for (let unreferencedComment of unreferencedComments) {
					  unreferencedComment.classList.remove("has-no-references");
					}
				}
			}
		}
		
		addStylesheet();
		
		// all paragraphs and list entries should have references
		const paragraphs = document.getElementById('mw-content-text').getElementsByTagName('p');
		const listEntries = document.getElementById('mw-content-text').getElementsByTagName('li');
		const elements = Array.from(paragraphs).concat(Array.from(listEntries));
		
		// these sections are not checked for references
		var excludedSections = ['Plot', 'Plots', 'Plot summary', 'Plot synopsis', 'Synopsis', 'Storylines', 'Further reading', 'See also', 'External links', 'References', 'Bibliography', 'Notes', 'Selected publications', 'Selected works', 'Cited sources', 'Sources', 'Footnotes'];
		
		// dedice whether the lead should be checked for references
		if(!shouldMarkLead()){
			excludedSections.push('');
		}
		
		for(var element of elements){
			// check whether the element should be excluded
			if(isEligible(element, excludedSections)){
				// mark the element if it lacks references
				mark(element);
			}
		}
		
		excludeFalsePositives();
	}
	
	// restrict script to mainspace, userspace, and draftspace
	var namespaceNumber = mw.config.get('wgNamespaceNumber');
	if (namespaceNumber === 0 || namespaceNumber === 2 || namespaceNumber === 118) {
		// add a link to the toolbox
		$.when(mw.loader.using('mediawiki.util'), $.ready).then(function (){
			var portletlink = mw.util.addPortletLink('p-tb', '#', 'Highlight unreferenced passages');
			
			// run the main function when the link is clicked
			portletlink.onclick = function(e) {
				e.preventDefault();
				markUnreferencedPassages();
				const unreferencedElements = document.getElementsByClassName('has-no-references');
				//mw.notify(`${unreferencedElements.length} elements were highlighted`);
				mw.notify('Highlighting finished.');
			};
		});
	}
	if (namespaceNumber === 0 || namespaceNumber === 118) {
		if(typeof highlightUnreferencedPassagesAutomatic != 'undefined' && highlightUnreferencedPassagesAutomatic == true){
			markUnreferencedPassages();
		}
	}
})();