Photo by Patrick Tomasso / Unsplash

Naive Bayes Classifier | Natural Language Processing In JavaScript

javascript Aug 19, 2022

NATURAL LANGUAGE PROCESSING

This article is in continuation to earlier article please read that to follow along here

So earlier we have seen how we can calculate the tf-idf of the words in plain javascript. in this article, we will see how we can use tfidf to classify the reviews as positive or negative by a technique called Naive Bayes

Let's See some code with steps

const { stopWords } = require("./StopWords");

class NaiveBaseClassifier {
  totalWords;
  WordsWithSentences;
  tfIdf;
  setDefaults(reviews) {
    this.totalWords = [
      ...new Set(
        reviews.flatMap((a) =>
          a.text.split(" ").filter((a) => stopWords.indexOf(a) === -1)
        )
      ),
    ];
    console.log(this.totalWords);
    this.WordsWithSentences = [
      ...new Set(
        this.totalWords.map((a) => {
          return {
            word: a,
            reviews: reviews.filter((b) => b.text.includes(a)).length,
          };
        })
      ),
    ];
  }
  train(reviews) {
    const reviewsCleaned = reviews.map((a) => {
      return {
        text: a.text
          .toLowerCase()
          .replace(/[^a-zA-Z0-9\s]/g, "")
          // remove numbers 
            .replace(/\d+/g, "")
          .replace(/\s+/g, " ")
          .trim()
          .split(" ")
          .filter((a) => a.length > 2)
          .join(" "),
        score: a.score,
      };
    });
    this.setDefaults(reviewsCleaned);
    this.calculateTfIdfOfWords(reviewsCleaned);
  }
  calculateTfIdfOfWords(reviews) {
    const numberOfReviews = reviews.length;

    this.tfIdf = reviews.map((a) => {
      const obj = {};

      const totalWordsInReview = a.text.split(" ").length;
      this.totalWords.forEach((key) => {
        const numberOfSentencesContainingTheWords =
          this.WordsWithSentences.find((b) => b.word == key);
        const tf = (a.text.includes(key) ? 1 : 0) / totalWordsInReview;
        const idf = Math.log(
          numberOfReviews / numberOfSentencesContainingTheWords.reviews
        );

        obj[key] = tf * idf;
      });
      obj.output = a.score;
      return obj;
    });
  }
  test(sentence) {
    const probYes =
      this.tfIdf.filter((a) => a.output === 1).length / this.tfIdf.length;
    const probNo =
      this.tfIdf.filter((a) => a.output === 0).length / this.tfIdf.length;

    let proby = probYes;
    let probn = probNo;
    this.totalWords.forEach((key) => {
      if (sentence.includes(key)) {
        const somea = this.tfIdf.filter(
          (a) => a.output == 1 && a[key] > 0
        ).length;
        const somec = this.tfIdf.filter(
          (a) => a.output == 0 && a[key] > 0
        ).length;
        const someb = this.tfIdf.filter((a) => a[key] > 0).length;

        proby *= somea / someb;
        probn *= somec / someb;
      }
    });
    //normalize the probability
    proby /= proby + probn;
    probn /= proby + probn;
    return {
      text: sentence,
      probYes: proby,
      probNo: probn,
      //output: text.score,
    };
  }
}

module.exports = NaiveBaseClassifier;

Now we will use our classifier to predict whether the given review is positive or negative

const NaiveBaseClassifier = require("./naiveBaseClassifier");

const review = [
  {
    text: "I got a defective piece. Since I was not at my home during the delivery period, I couldn't ask for replacement. I'm trying to claim warranty, but I didn't get any assistance.",
    score: 0,
  },
  {
    text: "This bag is of the lowest quality you can imagine. I am sure it's a duplicate one and not original. The inner partitions are so thin that they will tear off after 3-4 use. Returning the item.",
    score: 0,
  },
  {
    text: "The stiches of the bag is started coming out in 6 months. Feel the cloth is of poor quality. how to return the bag for availing the warranty, can anyone please advise",
    score: 0,
  },
  { text: "Very nice bag .... Very light....  And spacious. ...", score: 1 },
  {
    text: "Product is good with classy looks...Have enough space for 3 days travel..Fabric should have been even better",
    score: 1,
  },
  {
    text: "I got this for 803 ...at this price it's great .  I have got this right now ...looking great ..I feel that it will be robust in long term usage ..if not I will right and decrease the rating ...but for now it's amazing Amazon!!!",
    score: 1,
  },
  {
    text: "This backpack is amazing..  First I ordered the same bag with grey colour from Flipkart but the price was 1234.56 rupees but when I searched this product from Amazon price was 920 only.... And also delivery time was osm. U can go through with this bag for travelling ,school ,college nd etc. U all can judge from pics",
    score: 1,
  },
  {
    text: "Okk super product but the material is not like shown in picture but overall I review it 4.5 out of 5 stars nyc item and also it has very good storage capacity",
    score: 1,
  },
  { text: "Bag is quite good.. original american tourister", score: 1 },
  { text: "Very good quality material and looks good.", score: 1 },
  {
    text: "Build quality is excellent. The bag is water resistant. The graphics on bag make the more attractive. I'm totally satisfied with this backbag. It is very very light weight. Thank you American Tourister for manufacturing this marvellous product",
    score: 1,
  },
  {
    text: "I got this bag just a moment ago..  And my reaction was what the hell.. it looks like a copy and the material quality is really cheap..  it doesn’t look like an original American tourister..  Amazon is selling fake products",
    score: 0,
  },
  {
    text: "This bag was triple folded when it was delivered to me making its front part look ugly permanantly . The front part there fades aways after a few days or so . Its inner part is made of cheap polymer",
    score: 0,
  },
  { text: "Very low quality bag", score: 0 },
  {
    text: "As a brand of AT, i thought that it would give something which match the brand value. But it was a disappointment for me as the quality of the material used in this is very cheap. Even the cloth which is used in this as a partition for different section is also very thin. Personally I don't like this product",
    score: 0,
  },
  {
    text: "It's not original American tourister, the quality of bag is not good and very thin. Not for regular usage. Not worth buying.",
    score: 0,
  },
  {
    text: "The American Tourister bags quality is really going down. I have been using a bag from American Tourister bag for like 4 years and quality is really good but now news bags quality and colouris not worth buying anymore.",
    score: 0,
  },
  {
    text: "Looks good on images when the product arrived was.very dissatisfied as this is a first copy item not the original. Padding in the bags were poor and stick was imperfect. This can't be a original American touristed",
    score: 0,
  },

  {
    text: "This bag is of the lowest quality you can imagine. I am sure it's a duplicate one and not original. The inner partitions are so thin that they will tear off after 3-4 use. Returning the item.",
    score: 0,
  },
  {
    text: "Average quality product, i am astonished to know that big brands are also producing this kind of quality",
    score: 0,
  },
  {
    text: "I'm writing this review after almost a year of purchase. Everything is fine except their claim of being water-resistant.",
    score: 0,
  },
];

const stopwords = ["is", "this", "are"];
const classifier = new NaiveBaseClassifier();
classifier.train(review);
const result = classifier.test("this is amazing product");
const result1 = classifier.test("this product is really good");

console.log("result", result);
console.log("result", result1);

Thanks for reading this article

Tags