#code for WP posting
<code><script>
var myUrl = 'http://analytics.business/?p=153&preview=true';
if(window.top.location.href !== myUrl) {
window.top.location.href = myUrl;
}
</script></code>
#Analyze article clean, keywords, count, lexical dispersion & diversity April 11, 2017 Christa Taylor
#ref article http://www.cbc.ca/news/technology/trump-climate-change-executive-order-1.4043650
import nltk
nltk.download('maxent_ne_chunker')
nltk.download('words')
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('stopwords')
nltk.download('genesis')
nltk.download('wordnet')
from nltk.sentiment.vader import SentimentIntensityAnalyzer
nltk.download('vader_lexicon')
from nltk.collocations import * #nltk.download()
import numpy
import pandas as pd
import matplotlib
import sys #reload(sys) #sys.setdefaultencoding("utf-8")
import os
import pickle
#ref article http://www.cbc.ca/news/technology/trump-climate-change-executive-order-1.4043650
os.chdir('d:/text/') #folder
with open('cbcnewstrump.txt', 'r') as f: #filename
sample = f.read()
sentences = nltk.sent_tokenize(sample)
tokenized_sentences = [nltk.word_tokenize(sentence) for sentence in sentences]
tagged_sentences = [nltk.pos_tag(sentence) for sentence in tokenized_sentences]
chunked_sentences = nltk.ne_chunk_sents(tagged_sentences, binary=True)
def extract_entity_names(t):
entity_names = []
if hasattr(t, 'label') and t.label:
if t.label() == 'NE':
entity_names.append(' '.join([child[0] for child in t]))
else:
for child in t:
entity_names.extend(extract_entity_names(child))
return entity_names
entity_names = []
for tree in chunked_sentences:
# Print results per sentence
# print extract_entity_names(tree)
entity_names.extend(extract_entity_names(tree))
# Print unique entity names
#print set(entity_names)
set(['climate change', 'Trump', 'American', 'power plant', 'wind power industry', 'Environmental', 'coal miner', 'jobs', 'Environmental Protection Agency', 'America', 'Clean Power Plan', 'order'])
##Sentiment
##Using vader: http://www.nltk.org/_modules/nltk/sentiment/vader.html
sent = pd.DataFrame(index = range(0, 27),columns=["full_sentence","compound","negative","neutral","positive"])
sid = SentimentIntensityAnalyzer()
i=0
for sentence in sentences:
sent["full_sentence"][i] = sentence
ss = sid.polarity_scores(sentence)
sent["compound"][i] = ss['compound']
sent["negative"][i] = ss['neg']
sent["positive"][i] = ss['pos']
sent["neutral"][i] =ss['neu']
i = i + 1
#print sent
print ("Mean Neutral Score:", sent["neutral"].mean() )
print ("Mean Positive Score:" , sent["positive"].mean())
print ("Mean Negative Score:" , sent["negative"].mean())