explain each line import numpy as np import pandas as pd yelp = pd.read_csv('yelp.csv') yelp.head() #show the first 5 ro

Business, Finance, Economics, Accounting, Operations Management, Computer Science, Electrical Engineering, Mechanical Engineering, Civil Engineering, Chemical Engineering, Algebra, Precalculus, Statistics and Probabilty, Advanced Math, Physics, Chemistry, Biology, Nursing, Psychology, Certifications, Tests, Prep, and more.
Post Reply
answerhappygod
Site Admin
Posts: 899603
Joined: Mon Aug 02, 2021 8:13 am

explain each line import numpy as np import pandas as pd yelp = pd.read_csv('yelp.csv') yelp.head() #show the first 5 ro

Post by answerhappygod »

explain each line
import numpy as np
import pandas as pd
yelp = pd.read_csv('yelp.csv')
yelp.head() #show the first 5 rows
yelp.info()
yelp.describe()
yelp['text length'] = yelp['text'].apply(len)
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('white')
%matplotlib inline
g = sns.FacetGrid(yelp,col='stars')
g.map(plt.hist,'text length')
sns.boxplot(x='stars',y='text length',data=yelp,palette='rainbow')
sns.countplot(x='stars',data=yelp,palette='rainbow')
stars = yelp.groupby('stars').mean()
stars
stars.corr()
sns.heatmap(stars.corr(),cmap='coolwarm',annot=True)
yelp_class = yelp[(yelp.stars==1) | (yelp.stars==5)]
X = yelp_class['text'] #create object colled x contain
text column of yelp_class
y = yelp_class['stars']
from sklearn.feature_extraction.text import CountVectorizer
cv = CountVectorizer()
X = cv.fit_transform(X) Use the fit_transform method
on the CountVectorizer object and pass in X (the 'text' column)
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.3,random_state=101)
from sklearn.naive_bayes import MultinomialNB
nb = MultinomialNB()
nb.fit(X_train,y_train)
predictions = nb.predict(X_test)
from sklearn.metrics import confusion_matrix,classification_report
print(confusion_matrix(y_test,predictions))
print('\n')
print(classification_report(y_test,predictions))
from sklearn.feature_extraction.text import TfidfTransformer
# import TfidfTransformer from sklearn which used to
convert a collection of raw documents to a matrix of TF-IDF
features.
from sklearn.pipeline import Pipeline # Import
Pipeline from sklearn
pipeline = Pipeline([
('bow', CountVectorizer()),
('tfidf', TfidfTransformer()),
('classifier', MultinomialNB()),
])
Join a community of subject matter experts. Register for FREE to view solutions, replies, and use search function. Request answer by replying!
Post Reply