有个文章内链自动的问题有兴趣的一起探讨下

独家记忆 · 发表于 2024-1-30 08:10:17

作用不大，词多了整篇文章都是超链接

Crystαl · 发表于 2024-1-30 08:10:58

使用jieba-php库进行中文分词

1. 首先，安装jieba-php库。在项目目录中运行以下命令：

```bash
composer require fukuball/jieba-php
```

2. 创建一个名为`nlp.php`的文件，并在其中编写以下代码：

```php
<?php
require_once 'vendor/autoload.php';

use Fukuball\Jieba\Jieba;
use Fukuball\Jieba\Finalseg;

// 初始化jieba分词
Jieba::init();
Finalseg::init();

// 目标关键词
$target_keyword = "名古屋旅行";

// 示例文章
$article = "名古屋七天自由行是一次难忘的经历。我们参观了名古屋城和其他著名景点。";

// 对文章进行分词
$words = Jieba::cut($article);

// 提取名词短语
// 这里我们简化处理，只提取连续的名词作为名词短语
$noun_phrases = [];
$current_phrase = "";
foreach ($words as $word) {
if (preg_match("/^[\x{4e00}-\x{9fa5}]+$/u", $word)) {
      $current_phrase .= $word;
} else {
      if (!empty($current_phrase)) {
         $noun_phrases[] = $current_phrase;
         $current_phrase = "";
      }
}
}
if (!empty($current_phrase)) {
$noun_phrases[] = $current_phrase;
}

// 计算相似度
// 这里我们使用简单的Jaccard相似度
function jaccard_similarity($str1, $str2) {
$set1 = array_unique(preg_split('//u', $str1, -1, PREG_SPLIT_NO_EMPTY));
$set2 = array_unique(preg_split('//u', $str2, -1, PREG_SPLIT_NO_EMPTY));
$intersection = count(array_intersect($set1, $set2));
$union = count(array_unique(array_merge($set1, $set2)));
return $intersection / $union;
}

// 计算目标关键词与名词短语的相似度
$similarity_scores = array_map(function ($phrase) use ($target_keyword) {
return jaccard_similarity($target_keyword, $phrase);
}, $noun_phrases);

// 设置相似度阈值
$threshold = 0.4;

// 为相关短语添加链接
foreach ($similarity_scores as $i => $score) {
if ($score >= $threshold) {
      // 在这里为名词短语添加链接到目标文章
      echo "Add link to '{$noun_phrases[$i]}'\n";
}
}