"Word sense disambiguation is a crucial task in NLP as it helps determine the intended meaning of a word with multiple senses in a given context. Choosing the right method depends on various factors like the availability of labeled data, computational resources, and the desired level of accuracy and interpretability. Often, a combination of approaches can be used to achieve the best results."- Gemini 2024
The word "bank" can have multiple meanings: a financial institution or the edge of a river. Determine the correct meaning in the following sentence:
import nltk from nltk.corpus import wordnet def lesk(sentence, word): word_senses = wordnet.synsets(word) best_sense = None max_overlap = 0 for sense in word_senses: sense_definition = sense.definition() overlap = len(set(sentence.split()).intersection( set(sense_definition.split())) ) if overlap > max_overlap: max_overlap = overlap best_sense = sense return best_sense sentence = "I went to the bank to deposit money." word = "bank" sense = lesk(sentence, word) print(sense) # > Synset('bank.n.07')
import pandas as pd from sklearn.feature_extraction.text import CountVectorizer from sklearn.model_selection import train_test_split from sklearn.naive_bayes import MultinomialNB # Sample data (trivial example) data = { 'sentence': [ 'I went to the bank to deposit money.', 'The river bank is beautiful.', 'The bank loan was approved.' ], 'sense': [ 'financial institution', 'river edge', 'financial institution' ] } df = pd.DataFrame(data) # Preprocess data X = df['sentence'] y = df['sense'] vectorizer = CountVectorizer() X = vectorizer.fit_transform(X) # Split data into training and testing sets X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) # Create and train the classifier clf = MultinomialNB() clf.fit(X_train, y_train) # Predict the sense for a new sentence new_sentence = "I need to go to the bank to withdraw cash." new_sentence_vec = vectorizer.transform([new_sentence]) predicted_sense = clf.predict(new_sentence_vec)[0] print(predicted_sense) # > financial institution