mirror of
https://github.com/BookStackApp/BookStack.git
synced 2025-04-13 16:29:07 +00:00
Vectors: Built content vector indexing system
This commit is contained in:
parent
0ec0913846
commit
8452099a5b
9 changed files with 269 additions and 1 deletions
|
@ -22,6 +22,16 @@ return [
|
|||
// Callback URL for social authentication methods
|
||||
'callback_url' => env('APP_URL', false),
|
||||
|
||||
// LLM Service
|
||||
// Options: openai
|
||||
'llm' => env('LLM_SERVICE', ''),
|
||||
|
||||
// OpenAI API-compatible service details
|
||||
'openai' => [
|
||||
'endpoint' => env('OPENAI_ENDPOINT', 'https://api.openai.com'),
|
||||
'key' => env('OPENAI_KEY', ''),
|
||||
],
|
||||
|
||||
'github' => [
|
||||
'client_id' => env('GITHUB_APP_ID', false),
|
||||
'client_secret' => env('GITHUB_APP_SECRET', false),
|
||||
|
|
|
@ -6,6 +6,8 @@ use BookStack\Activity\Models\Tag;
|
|||
use BookStack\Entities\EntityProvider;
|
||||
use BookStack\Entities\Models\Entity;
|
||||
use BookStack\Entities\Models\Page;
|
||||
use BookStack\Search\Vectors\StoreEntityVectorsJob;
|
||||
use BookStack\Search\Vectors\VectorQueryServiceProvider;
|
||||
use BookStack\Util\HtmlDocument;
|
||||
use DOMNode;
|
||||
use Illuminate\Database\Eloquent\Builder;
|
||||
|
@ -25,7 +27,7 @@ class SearchIndex
|
|||
public static string $softDelimiters = ".-";
|
||||
|
||||
public function __construct(
|
||||
protected EntityProvider $entityProvider
|
||||
protected EntityProvider $entityProvider,
|
||||
) {
|
||||
}
|
||||
|
||||
|
@ -37,6 +39,10 @@ class SearchIndex
|
|||
$this->deleteEntityTerms($entity);
|
||||
$terms = $this->entityToTermDataArray($entity);
|
||||
$this->insertTerms($terms);
|
||||
|
||||
if (VectorQueryServiceProvider::isEnabled()) {
|
||||
dispatch(new StoreEntityVectorsJob($entity));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -47,9 +53,15 @@ class SearchIndex
|
|||
public function indexEntities(array $entities): void
|
||||
{
|
||||
$terms = [];
|
||||
$vectorQueryEnabled = VectorQueryServiceProvider::isEnabled();
|
||||
|
||||
foreach ($entities as $entity) {
|
||||
$entityTerms = $this->entityToTermDataArray($entity);
|
||||
array_push($terms, ...$entityTerms);
|
||||
|
||||
if ($vectorQueryEnabled) {
|
||||
dispatch(new StoreEntityVectorsJob($entity));
|
||||
}
|
||||
}
|
||||
|
||||
$this->insertTerms($terms);
|
||||
|
|
84
app/Search/Vectors/EntityVectorGenerator.php
Normal file
84
app/Search/Vectors/EntityVectorGenerator.php
Normal file
|
@ -0,0 +1,84 @@
|
|||
<?php
|
||||
|
||||
namespace BookStack\Search\Vectors;
|
||||
|
||||
use BookStack\Entities\Models\Entity;
|
||||
use BookStack\Search\Vectors\Services\VectorQueryService;
|
||||
use Illuminate\Support\Facades\DB;
|
||||
|
||||
class EntityVectorGenerator
|
||||
{
|
||||
public function __construct(
|
||||
protected VectorQueryServiceProvider $vectorQueryServiceProvider
|
||||
) {
|
||||
}
|
||||
|
||||
public function generateAndStore(Entity $entity): void
|
||||
{
|
||||
$vectorService = $this->vectorQueryServiceProvider->get();
|
||||
|
||||
$text = $this->entityToPlainText($entity);
|
||||
$chunks = $this->chunkText($text);
|
||||
$embeddings = $this->chunksToEmbeddings($chunks, $vectorService);
|
||||
|
||||
$this->deleteExistingEmbeddingsForEntity($entity);
|
||||
$this->storeEmbeddings($embeddings, $chunks, $entity);
|
||||
}
|
||||
|
||||
protected function deleteExistingEmbeddingsForEntity(Entity $entity): void
|
||||
{
|
||||
SearchVector::query()
|
||||
->where('entity_type', '=', $entity->getMorphClass())
|
||||
->where('entity_id', '=', $entity->id)
|
||||
->delete();
|
||||
}
|
||||
|
||||
protected function storeEmbeddings(array $embeddings, array $textChunks, Entity $entity): void
|
||||
{
|
||||
$toInsert = [];
|
||||
|
||||
foreach ($embeddings as $index => $embedding) {
|
||||
$text = $textChunks[$index];
|
||||
$toInsert[] = [
|
||||
'entity_id' => $entity->id,
|
||||
'entity_type' => $entity->getMorphClass(),
|
||||
'embedding' => DB::raw('STRING_TO_VECTOR("[' . implode(',', $embedding) . ']")'),
|
||||
'text' => $text,
|
||||
];
|
||||
}
|
||||
|
||||
// TODO - Chunk inserts
|
||||
SearchVector::query()->insert($toInsert);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string[] $chunks
|
||||
* @return float[] array
|
||||
*/
|
||||
protected function chunksToEmbeddings(array $chunks, VectorQueryService $vectorQueryService): array
|
||||
{
|
||||
$embeddings = [];
|
||||
foreach ($chunks as $index => $chunk) {
|
||||
$embeddings[$index] = $vectorQueryService->generateEmbeddings($chunk);
|
||||
}
|
||||
return $embeddings;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
protected function chunkText(string $text): array
|
||||
{
|
||||
// TODO - Join adjacent smaller chunks up
|
||||
return array_filter(array_map(function (string $section): string {
|
||||
return trim($section);
|
||||
}, explode("\n", $text)));
|
||||
}
|
||||
|
||||
protected function entityToPlainText(Entity $entity): string
|
||||
{
|
||||
$text = $entity->name . "\n\n" . $entity->{$entity->textField};
|
||||
// TODO - Add tags
|
||||
return $text;
|
||||
}
|
||||
}
|
16
app/Search/Vectors/SearchVector.php
Normal file
16
app/Search/Vectors/SearchVector.php
Normal file
|
@ -0,0 +1,16 @@
|
|||
<?php
|
||||
|
||||
namespace BookStack\Search\Vectors;
|
||||
|
||||
use Illuminate\Database\Eloquent\Model;
|
||||
|
||||
/**
|
||||
* @property string $entity_type
|
||||
* @property int $entity_id
|
||||
* @property string $text
|
||||
* @property string $embedding
|
||||
*/
|
||||
class SearchVector extends Model
|
||||
{
|
||||
public $timestamps = false;
|
||||
}
|
36
app/Search/Vectors/Services/OpenAiVectorQueryService.php
Normal file
36
app/Search/Vectors/Services/OpenAiVectorQueryService.php
Normal file
|
@ -0,0 +1,36 @@
|
|||
<?php
|
||||
|
||||
namespace BookStack\Search\Vectors\Services;
|
||||
|
||||
use BookStack\Http\HttpRequestService;
|
||||
|
||||
class OpenAiVectorQueryService implements VectorQueryService
|
||||
{
|
||||
public function __construct(
|
||||
protected string $endpoint,
|
||||
protected string $key,
|
||||
protected HttpRequestService $http,
|
||||
) {
|
||||
}
|
||||
|
||||
protected function jsonRequest(string $method, string $uri, array $data): array
|
||||
{
|
||||
$fullUrl = rtrim($this->endpoint, '/') . '/' . ltrim($uri, '/');
|
||||
$client = $this->http->buildClient(10);
|
||||
$request = $this->http->jsonRequest($method, $fullUrl, $data)
|
||||
->withHeader('Authorization', 'Bearer ' . $this->key);
|
||||
|
||||
$response = $client->sendRequest($request);
|
||||
return json_decode($response->getBody()->getContents(), true);
|
||||
}
|
||||
|
||||
public function generateEmbeddings(string $text): array
|
||||
{
|
||||
$response = $this->jsonRequest('POST', 'v1/embeddings', [
|
||||
'input' => $text,
|
||||
'model' => 'text-embedding-3-small',
|
||||
]);
|
||||
|
||||
return $response['data'][0]['embedding'];
|
||||
}
|
||||
}
|
12
app/Search/Vectors/Services/VectorQueryService.php
Normal file
12
app/Search/Vectors/Services/VectorQueryService.php
Normal file
|
@ -0,0 +1,12 @@
|
|||
<?php
|
||||
|
||||
namespace BookStack\Search\Vectors\Services;
|
||||
|
||||
interface VectorQueryService
|
||||
{
|
||||
/**
|
||||
* Generate embedding vectors from the given chunk of text.
|
||||
* @return float[]
|
||||
*/
|
||||
public function generateEmbeddings(string $text): array;
|
||||
}
|
28
app/Search/Vectors/StoreEntityVectorsJob.php
Normal file
28
app/Search/Vectors/StoreEntityVectorsJob.php
Normal file
|
@ -0,0 +1,28 @@
|
|||
<?php
|
||||
|
||||
namespace BookStack\Search\Vectors;
|
||||
|
||||
use BookStack\Entities\Models\Entity;
|
||||
use Illuminate\Contracts\Queue\ShouldQueue;
|
||||
use Illuminate\Foundation\Queue\Queueable;
|
||||
|
||||
class StoreEntityVectorsJob implements ShouldQueue
|
||||
{
|
||||
use Queueable;
|
||||
|
||||
/**
|
||||
* Create a new job instance.
|
||||
*/
|
||||
public function __construct(
|
||||
protected Entity $entity
|
||||
) {
|
||||
}
|
||||
|
||||
/**
|
||||
* Execute the job.
|
||||
*/
|
||||
public function handle(EntityVectorGenerator $generator): void
|
||||
{
|
||||
$generator->generateAndStore($this->entity);
|
||||
}
|
||||
}
|
38
app/Search/Vectors/VectorQueryServiceProvider.php
Normal file
38
app/Search/Vectors/VectorQueryServiceProvider.php
Normal file
|
@ -0,0 +1,38 @@
|
|||
<?php
|
||||
|
||||
namespace BookStack\Search\Vectors;
|
||||
|
||||
use BookStack\Http\HttpRequestService;
|
||||
use BookStack\Search\Vectors\Services\OpenAiVectorQueryService;
|
||||
use BookStack\Search\Vectors\Services\VectorQueryService;
|
||||
|
||||
class VectorQueryServiceProvider
|
||||
{
|
||||
public function __construct(
|
||||
protected HttpRequestService $http,
|
||||
) {
|
||||
}
|
||||
|
||||
public function get(): VectorQueryService
|
||||
{
|
||||
$service = $this->getServiceName();
|
||||
|
||||
if ($service === 'openai') {
|
||||
$key = config('services.openai.key');
|
||||
$endpoint = config('services.openai.endpoint');
|
||||
return new OpenAiVectorQueryService($endpoint, $key, $this->http);
|
||||
}
|
||||
|
||||
throw new \Exception("No '{$service}' LLM service found");
|
||||
}
|
||||
|
||||
protected static function getServiceName(): string
|
||||
{
|
||||
return strtolower(config('services.llm'));
|
||||
}
|
||||
|
||||
public static function isEnabled(): bool
|
||||
{
|
||||
return !empty(static::getServiceName());
|
||||
}
|
||||
}
|
|
@ -0,0 +1,32 @@
|
|||
<?php
|
||||
|
||||
use Illuminate\Database\Migrations\Migration;
|
||||
use Illuminate\Database\Schema\Blueprint;
|
||||
use Illuminate\Support\Facades\Schema;
|
||||
|
||||
return new class extends Migration
|
||||
{
|
||||
/**
|
||||
* Run the migrations.
|
||||
*/
|
||||
public function up(): void
|
||||
{
|
||||
// TODO - Handle compatibility with older databases that don't support vectors
|
||||
Schema::create('search_vectors', function (Blueprint $table) {
|
||||
$table->string('entity_type', 100);
|
||||
$table->integer('entity_id');
|
||||
$table->text('text');
|
||||
$table->vector('embedding');
|
||||
|
||||
$table->index(['entity_type', 'entity_id']);
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Reverse the migrations.
|
||||
*/
|
||||
public function down(): void
|
||||
{
|
||||
Schema::dropIfExists('search_vectors');
|
||||
}
|
||||
};
|
Loading…
Add table
Reference in a new issue