��/��/��ţ

��

NLP�е�Ƕ��;��

��Դ�� - deephub

��Ľ��о�Ƕ�롢ʸ��ݿ�͸��־��ĸ����ṩʾ��ʾ��롣

NLP�е�Ƕ��

Ƕ��ռ��ж��󡢵��ʻ�ʵ��ֵ��ʾ��NLP�У��Ƕ�벶��֮��ϵ��ʹ�㷨�ܹ��õ��ı��ĺͺ��塣

��һ��Ӻ�һЩ��ӻ��ķ��6��ӣ��Ҫ��Ƕ��

from sentence_transformers import SentenceTransformer

# Sample text embedding model

model = SentenceTransformer('paraphrase-MiniLM-L6-v2')

#Sentences we want to encode. Example:

sentence = ['The team enjoyed the hike through the meadow',

? ? ? ? ? 'The team enjoyed the hike through the mountains',

? ? ? ? ? 'The team has not enjoyed the hike through the meadows',

? ? ? ? ? 'The national park had great views',

? ? ? ? ? 'There were lot of rare animals in national park',

? ? ? ? ? 'Olive oil drizzled over pizza tastes delicious']

#Sentences are encoded by calling model.encode()

embedding = model.encode(sentence)

#Preview the embeddings

print(embedding)

# As we can see embeddings are nothing but

# numerical representation of sentences in a vector form

[[ 0.37061948 0.26414198 0.21265635 ... 0.14994518 -0.25794953

-0.23970771]

[-0.07645706 0.27122658 -0.04530133 ... -0.27319074 -0.60025024

-0.302555 ]

[ 0.35693657 -0.2331443 ? 0.418002 ? ... -0.37369558 -0.10241977

-0.03282997]

[ 0.66933334 0.40094966 -0.48208416 ... 0.10645878 -1.5067163

-0.01547382]

[ 0.4339616 ? 0.2323563 ? 0.21751338 ... -0.5746389 -0.26438454

? 0.492655 ]

[-0.2655593 ? 0.11172348 -0.1473308 ... 0.42197517 0.88394576

? 0.10763898]]

��Կ��ǻ��һ��384ά�ȵ�Ƕ��

embedding.shape

(6, 384)

Ȼ��ʹ��PCA��н�ά��ʹ��matplot��ӻ�

from sklearn.decomposition import PCA

# Perform PCA for 2D visualization

PCA_model = PCA(n_components = 2)

PCA_model.fit(embedding)

new_embeddings = PCA_model.transform(embedding)

# As we can see now the shape has changed from (6,384)->(6,2)

Shape: (6, 2)

[[-2.7454064 -1.628386 ]

[-2.7024133 -2.0113547 ]

[-2.6084075 -2.5289955 ]

[ 0.62488586 3.9073005 ]

[ 0.09110744 4.9031897 ]

[ 7.3402357 -2.6417546 ]]

PCA��ά�Ƚ�ά2 ��ʾ��2D��ɢ��ͼ��

import matplotlib.pyplot as plt

import mplcursors

def plot2d(x_values, y_values, text_labels):

? """

? Create a 2D plot with annotations.

? Parameters:

? - x_values (array): X-axis values.

? - y_values (array): Y-axis values.

? - text_labels (list): List of text labels for each point.

? """

? fig, ax = plt.subplots()

? scatter = ax.scatter(x_values, y_values, label='Data Points')

? # Annotate points with text labels

? for i, label in enumerate(text_labels):

? ? ? ax.annotate(label, (x_values[i], y_values[i]))

? mplcursors.cursor(hover=True)

? ax.set_xlabel('X-axis')

? ax.set_ylabel('Y-axis')

? ax.set_title('2D Plot with Annotations')

? plt.show()

��

import matplotlib.pyplot as plt

import mplcursors

# pass the embeddings and original sentence to create labels

plot2d(new_embeddings[:,0], new_embeddings[:,1], sentence)

��Կ��˴��Ƶľ��ӱ�ͶӰ��˴˸��ʵ�ʺ��塣��磬�ᵽ��National Park��ľ��ӱ˴˿��ø��̸�ۡ�hiking��ľ��ӱ˴˿��ø��

��ζ��Щ��Ƕ��֮��ľ��?

�кܶ෽��Լ��֮��ľ��룬��ǽ��ʸ��ݿ��г��õ�4�־��

1��ŷ�Ͼ��

��֮��·��ĳ��

ŷ��þ��⣬��Ϊ��ռ��֮��ֱ�߾��롣��ʸ��Ԫ��֮��Ĵ�С�;��Բ��Ҫ��

��ŷ�Ͼ��ݵĳ߶Ⱥ��С��в�ͬ�ĳ߶ȣ��ܱ��߶Ƚϴ��ڸ�ά�ռ��У�ŷ��þ��ܱ�ò��ô��壬��Ϊ��ڡ�ά��䡱��

��Ƕ�ά�ռ��е��:A(1,2)��B (4,6)�� A��B֮��ŷ�Ͼ��Ϊ:

2��Ҿ��

��ʸ��֮��ķ��Բ��

��Ҿ��벻��С��Ӱ�죬ʹ��Գ߶Ȳ��³��ԣ��ǳ��ʺϸ�ά�ռ䡣

��Ҿ��ֻ��ķ��򣬶��ǵĴ�С��ʸ��ƣ��ܴ��ô��ǵ��ƶȿ��ܽӽ��0��

3��Jaccard

Jaccardϵ��֮��ԣ��Ϊ��Ĵ�С��Ĵ�С:

�ڴ��ϻ��Ԫ��ʱ��Jaccard�ر��ã�ʹ��ı��ĵ��Ƚϵȳ��ƶ�һ��Jaccard�Դ�С��С�

��Ҫ��Ϊ��Ԫ��Ƶģ��ܲ��ʺ��ݻ��С��Ϣ��Ҫ��϶�Ϊ��ʱ��Jaccard��ƶ��δ��ġ�

4��پ��

��پ��룬Ҳ��ΪL1��⳵��룬��ϵͳ��֮��ľ��룬ֻ��ˮƽ�ʹ�ֱ�˶��

��پ��ֱ��ڽ��͡��Ӧ�ڳ��⳵��״��·ϵͳ��ʻ�ľ��룬��ˮƽ�ʹ�ֱ��ʻ�Ե��Ŀ�ĵء��ּ��ʹ��ڿɽ��Ҫ�ĳ��ر��á�

��پ��ڴ��Ա��ʽ��ʾ�Ļ��ϵͳ��ݼ�ʱ�ǳ��á��ǳ��ʺ��ƶ��ĳ��ͼ��С�

��پ��һ��ش�ȱ��ݹ�ģ��ԡ��в�ͬ�ĳ߶ȣ��ܱ��߶Ƚϴ��ڴ��С��ͬ��ݼ�ʱ��ܵ��´��Ž��

��پ��뱾��ˮƽ�ʹ�ֱ��˶��֮��ĶԽ��ƶ��Թ�ϵ��Ҫ��£��پ��޷�׼ȷ��е�Ǳ��ģʽ��

��ƶ�vs��ƶ�

��һ��Ȥ��Ϊʲô��Ҷ��ƶ��ʸ��롣

��ƶȱ��ƶȸ��ڲ��֮��ƶȡ��Һ��ƶ�֮��ѡ��ȡ��ݵ��ʺ��ľ��Ҫ��̽��һ��Ϊʲô��ƶ��ѡ:

��ƶȶ��ʸ��֮��ļн��ֵ��ƶȶ��Ǽнǵ��ֵ��ƶȸ�ֱ�ӵر�ʾʸ��ָ��ƶȣ��ƶ��һЩ��¿��ܲ��ƶ�ֱ�ۡ�

��ƶȵļ��а��ʸ��ڻ��ƶ��漰��ʸ��ڸ�ά�ռ��У�ʸ��֮��ڻ��׼��㣬��漰��ӵļ��㡣��ƶȵļ��ʵ��Ӧ��и�Ϊ��Ч��

��ƶ��֮��ĽǶ�ֱ��ء��ƶ�Ϊ1��ʾ��ָ��ͬ�ķ��򣬶��ƶ�Ϊ-1��ʾ��ָ��෴�ķ��ƶ�Ϊ0��ʾ��ԡ��ֱ�۵Ľ��Ӧ�ó��ǳ��Ǻϡ�

��ƶ��ʵ��в�̫��нǵ��ֵ��ǿ��ƶȵĴ�ֱ����Ȼ��ƶȿ��ض��紹ֱ��Ҫ�ĳ��ƶ��ڹ淶��ͺ�Ч�ʷ��ƶ��㷺�ز��á�

�ܽ�

��̽��Ƕ��;��ĸ����Щ��ڸ��򹹽��߼�NLPģ�ͺ�Ӧ�ó��Ҫ��Ϊ��ʵ��Ӧ��Ҫ��ǳ��Բ�ͬ��ģ�͡��ݿ��ָ�꣬��ض��Ż��ܡ�

��: 2024-02-062024-02-06 09:37:58
ԭ��https://page.om.qq.com/page/OCdHZ2D93K46aP1ShBYaEt5A0
��Ѷ��Ѷ�ƿ��Ѷ��ݿ��ƽ̨�ʺţ��ţ��֮һ��Ѷ��ݿ��ƽ̨��Э�顷ת�ط��ݡ�
��Ȩ��ϵ cloudcommunity@tencent.com ɾ��

��Ѷ

ɨ��

��վ�� Ⱥ

��ȡר�� 10Ԫ��ż�ȯ

˽�� ��ɻ�

NLP�е�Ƕ��;��

��Ѷ

ɨ��

��

�

��Դ

��

��Ѷ�ƿ��

��Ų�Ʒ

��Ƽ�

��Ƽ�

NLP�е�Ƕ��;������

�����Ѷ

����

�

��Դ

����

��Ѷ�ƿ�����

���Ų�Ʒ

�����Ƽ�

�����Ƽ�

NLP�е�Ƕ��;��

��Ѷ

��

��

��Ѷ�ƿ��

��Ų�Ʒ

��Ƽ�

��Ƽ�