diff --git a/app/Config/services.php b/app/Config/services.php index d73458231..a34b243f0 100644 --- a/app/Config/services.php +++ b/app/Config/services.php @@ -22,6 +22,16 @@ return [ // Callback URL for social authentication methods 'callback_url' => env('APP_URL', false), + // LLM Service + // Options: openai + 'llm' => env('LLM_SERVICE', ''), + + // OpenAI API-compatible service details + 'openai' => [ + 'endpoint' => env('OPENAI_ENDPOINT', 'https://api.openai.com'), + 'key' => env('OPENAI_KEY', ''), + ], + 'github' => [ 'client_id' => env('GITHUB_APP_ID', false), 'client_secret' => env('GITHUB_APP_SECRET', false), diff --git a/app/Search/SearchIndex.php b/app/Search/SearchIndex.php index 36f71f6cc..9b34fa04e 100644 --- a/app/Search/SearchIndex.php +++ b/app/Search/SearchIndex.php @@ -6,6 +6,8 @@ use BookStack\Activity\Models\Tag; use BookStack\Entities\EntityProvider; use BookStack\Entities\Models\Entity; use BookStack\Entities\Models\Page; +use BookStack\Search\Vectors\StoreEntityVectorsJob; +use BookStack\Search\Vectors\VectorQueryServiceProvider; use BookStack\Util\HtmlDocument; use DOMNode; use Illuminate\Database\Eloquent\Builder; @@ -25,7 +27,7 @@ class SearchIndex public static string $softDelimiters = ".-"; public function __construct( - protected EntityProvider $entityProvider + protected EntityProvider $entityProvider, ) { } @@ -37,6 +39,10 @@ class SearchIndex $this->deleteEntityTerms($entity); $terms = $this->entityToTermDataArray($entity); $this->insertTerms($terms); + + if (VectorQueryServiceProvider::isEnabled()) { + dispatch(new StoreEntityVectorsJob($entity)); + } } /** @@ -47,9 +53,15 @@ class SearchIndex public function indexEntities(array $entities): void { $terms = []; + $vectorQueryEnabled = VectorQueryServiceProvider::isEnabled(); + foreach ($entities as $entity) { $entityTerms = $this->entityToTermDataArray($entity); array_push($terms, ...$entityTerms); + + if ($vectorQueryEnabled) { + dispatch(new StoreEntityVectorsJob($entity)); + } } $this->insertTerms($terms); diff --git a/app/Search/Vectors/EntityVectorGenerator.php b/app/Search/Vectors/EntityVectorGenerator.php new file mode 100644 index 000000000..8a4918773 --- /dev/null +++ b/app/Search/Vectors/EntityVectorGenerator.php @@ -0,0 +1,84 @@ +<?php + +namespace BookStack\Search\Vectors; + +use BookStack\Entities\Models\Entity; +use BookStack\Search\Vectors\Services\VectorQueryService; +use Illuminate\Support\Facades\DB; + +class EntityVectorGenerator +{ + public function __construct( + protected VectorQueryServiceProvider $vectorQueryServiceProvider + ) { + } + + public function generateAndStore(Entity $entity): void + { + $vectorService = $this->vectorQueryServiceProvider->get(); + + $text = $this->entityToPlainText($entity); + $chunks = $this->chunkText($text); + $embeddings = $this->chunksToEmbeddings($chunks, $vectorService); + + $this->deleteExistingEmbeddingsForEntity($entity); + $this->storeEmbeddings($embeddings, $chunks, $entity); + } + + protected function deleteExistingEmbeddingsForEntity(Entity $entity): void + { + SearchVector::query() + ->where('entity_type', '=', $entity->getMorphClass()) + ->where('entity_id', '=', $entity->id) + ->delete(); + } + + protected function storeEmbeddings(array $embeddings, array $textChunks, Entity $entity): void + { + $toInsert = []; + + foreach ($embeddings as $index => $embedding) { + $text = $textChunks[$index]; + $toInsert[] = [ + 'entity_id' => $entity->id, + 'entity_type' => $entity->getMorphClass(), + 'embedding' => DB::raw('STRING_TO_VECTOR("[' . implode(',', $embedding) . ']")'), + 'text' => $text, + ]; + } + + // TODO - Chunk inserts + SearchVector::query()->insert($toInsert); + } + + /** + * @param string[] $chunks + * @return float[] array + */ + protected function chunksToEmbeddings(array $chunks, VectorQueryService $vectorQueryService): array + { + $embeddings = []; + foreach ($chunks as $index => $chunk) { + $embeddings[$index] = $vectorQueryService->generateEmbeddings($chunk); + } + return $embeddings; + } + + /** + * @return string[] + */ + protected function chunkText(string $text): array + { + // TODO - Join adjacent smaller chunks up + return array_filter(array_map(function (string $section): string { + return trim($section); + }, explode("\n", $text))); + } + + protected function entityToPlainText(Entity $entity): string + { + $text = $entity->name . "\n\n" . $entity->{$entity->textField}; + // TODO - Add tags + return $text; + } +} diff --git a/app/Search/Vectors/SearchVector.php b/app/Search/Vectors/SearchVector.php new file mode 100644 index 000000000..4a5555f87 --- /dev/null +++ b/app/Search/Vectors/SearchVector.php @@ -0,0 +1,16 @@ +<?php + +namespace BookStack\Search\Vectors; + +use Illuminate\Database\Eloquent\Model; + +/** + * @property string $entity_type + * @property int $entity_id + * @property string $text + * @property string $embedding + */ +class SearchVector extends Model +{ + public $timestamps = false; +} diff --git a/app/Search/Vectors/Services/OpenAiVectorQueryService.php b/app/Search/Vectors/Services/OpenAiVectorQueryService.php new file mode 100644 index 000000000..8d2910998 --- /dev/null +++ b/app/Search/Vectors/Services/OpenAiVectorQueryService.php @@ -0,0 +1,36 @@ +<?php + +namespace BookStack\Search\Vectors\Services; + +use BookStack\Http\HttpRequestService; + +class OpenAiVectorQueryService implements VectorQueryService +{ + public function __construct( + protected string $endpoint, + protected string $key, + protected HttpRequestService $http, + ) { + } + + protected function jsonRequest(string $method, string $uri, array $data): array + { + $fullUrl = rtrim($this->endpoint, '/') . '/' . ltrim($uri, '/'); + $client = $this->http->buildClient(10); + $request = $this->http->jsonRequest($method, $fullUrl, $data) + ->withHeader('Authorization', 'Bearer ' . $this->key); + + $response = $client->sendRequest($request); + return json_decode($response->getBody()->getContents(), true); + } + + public function generateEmbeddings(string $text): array + { + $response = $this->jsonRequest('POST', 'v1/embeddings', [ + 'input' => $text, + 'model' => 'text-embedding-3-small', + ]); + + return $response['data'][0]['embedding']; + } +} diff --git a/app/Search/Vectors/Services/VectorQueryService.php b/app/Search/Vectors/Services/VectorQueryService.php new file mode 100644 index 000000000..2cc4ed017 --- /dev/null +++ b/app/Search/Vectors/Services/VectorQueryService.php @@ -0,0 +1,12 @@ +<?php + +namespace BookStack\Search\Vectors\Services; + +interface VectorQueryService +{ + /** + * Generate embedding vectors from the given chunk of text. + * @return float[] + */ + public function generateEmbeddings(string $text): array; +} diff --git a/app/Search/Vectors/StoreEntityVectorsJob.php b/app/Search/Vectors/StoreEntityVectorsJob.php new file mode 100644 index 000000000..e96fc345a --- /dev/null +++ b/app/Search/Vectors/StoreEntityVectorsJob.php @@ -0,0 +1,28 @@ +<?php + +namespace BookStack\Search\Vectors; + +use BookStack\Entities\Models\Entity; +use Illuminate\Contracts\Queue\ShouldQueue; +use Illuminate\Foundation\Queue\Queueable; + +class StoreEntityVectorsJob implements ShouldQueue +{ + use Queueable; + + /** + * Create a new job instance. + */ + public function __construct( + protected Entity $entity + ) { + } + + /** + * Execute the job. + */ + public function handle(EntityVectorGenerator $generator): void + { + $generator->generateAndStore($this->entity); + } +} diff --git a/app/Search/Vectors/VectorQueryServiceProvider.php b/app/Search/Vectors/VectorQueryServiceProvider.php new file mode 100644 index 000000000..c700307e1 --- /dev/null +++ b/app/Search/Vectors/VectorQueryServiceProvider.php @@ -0,0 +1,38 @@ +<?php + +namespace BookStack\Search\Vectors; + +use BookStack\Http\HttpRequestService; +use BookStack\Search\Vectors\Services\OpenAiVectorQueryService; +use BookStack\Search\Vectors\Services\VectorQueryService; + +class VectorQueryServiceProvider +{ + public function __construct( + protected HttpRequestService $http, + ) { + } + + public function get(): VectorQueryService + { + $service = $this->getServiceName(); + + if ($service === 'openai') { + $key = config('services.openai.key'); + $endpoint = config('services.openai.endpoint'); + return new OpenAiVectorQueryService($endpoint, $key, $this->http); + } + + throw new \Exception("No '{$service}' LLM service found"); + } + + protected static function getServiceName(): string + { + return strtolower(config('services.llm')); + } + + public static function isEnabled(): bool + { + return !empty(static::getServiceName()); + } +} diff --git a/database/migrations/2025_03_24_155748_create_search_vectors_table.php b/database/migrations/2025_03_24_155748_create_search_vectors_table.php new file mode 100644 index 000000000..d7fb0118a --- /dev/null +++ b/database/migrations/2025_03_24_155748_create_search_vectors_table.php @@ -0,0 +1,32 @@ +<?php + +use Illuminate\Database\Migrations\Migration; +use Illuminate\Database\Schema\Blueprint; +use Illuminate\Support\Facades\Schema; + +return new class extends Migration +{ + /** + * Run the migrations. + */ + public function up(): void + { + // TODO - Handle compatibility with older databases that don't support vectors + Schema::create('search_vectors', function (Blueprint $table) { + $table->string('entity_type', 100); + $table->integer('entity_id'); + $table->text('text'); + $table->vector('embedding'); + + $table->index(['entity_type', 'entity_id']); + }); + } + + /** + * Reverse the migrations. + */ + public function down(): void + { + Schema::dropIfExists('search_vectors'); + } +};