Skip to content
Snippets Groups Projects
Commit 2e49b555 authored by echicken's avatar echicken :chicken:
Browse files

Initial commit

parents
No related branches found
No related tags found
No related merge requests found
Pipeline #6500 canceled
.env
node_modules
build
data
\ No newline at end of file
FROM --platform=linux/amd64 node:20
# We don't need the standalone Chromium
ENV PUPPETEER_SKIP_CHROMIUM_DOWNLOAD true
# Install Google Chrome Stable and fonts
# Note: this installs the necessary libs to make the browser work with Puppeteer.
RUN apt-get update && apt-get install curl gnupg -y \
&& curl --location --silent https://dl-ssl.google.com/linux/linux_signing_key.pub | apt-key add - \
&& sh -c 'echo "deb [arch=amd64] http://dl.google.com/linux/chrome/deb/ stable main" >> /etc/apt/sources.list.d/google.list' \
&& apt-get update \
&& apt-get install google-chrome-stable -y --no-install-recommends \
&& rm -rf /var/lib/apt/lists/*
WORKDIR /app
COPY . /app/
RUN npm install
RUN npm run build
CMD ["npm", "start"]
\ No newline at end of file
services:
qdrant:
# image: qdrant/qdrant
build:
context: .
dockerfile_inline: |
FROM qdrant/qdrant:latest
RUN apt-get update -yq && apt-get install -yqq curl
ports:
- 6333:6333
volumes:
- ./data/qdrant/storage:/qdrant/storage
- ./data/qdrant/snapshots:/qdrant/snapshots
- ./conf/qdrant/config.yaml:/qdrant/config/config.yaml
healthcheck:
test: curl -s http://localhost:6333/healthz | grep -q 'healthz check passed' || exit 1
interval: 1m
timeout: 10s
retries: 3
start_period: 10s
start_interval: 5s
binary-bob:
build: .
depends_on:
qdrant:
condition: service_healthy
volumes:
- ./data/binary-bob:/app/data
ports:
- 3000:3000
\ No newline at end of file
log_level: INFO
storage:
# Where to store all the data
storage_path: ./storage
# Where to store snapshots
snapshots_path: ./snapshots
snapshots_config:
# "local" or "s3" - where to store snapshots
snapshots_storage: local
# s3_config:
# bucket: ""
# region: ""
# access_key: ""
# secret_key: ""
# Where to store temporary files
# If null, temporary snapshot are stored in: storage/snapshots_temp/
temp_path: null
# If true - point's payload will not be stored in memory.
# It will be read from the disk every time it is requested.
# This setting saves RAM by (slightly) increasing the response time.
# Note: those payload values that are involved in filtering and are indexed - remain in RAM.
on_disk_payload: true
# Maximum number of concurrent updates to shard replicas
# If `null` - maximum concurrency is used.
update_concurrency: null
# Write-ahead-log related configuration
wal:
# Size of a single WAL segment
wal_capacity_mb: 32
# Number of WAL segments to create ahead of actual data requirement
wal_segments_ahead: 0
# Normal node - receives all updates and answers all queries
node_type: "Normal"
# Listener node - receives all updates, but does not answer search/read queries
# Useful for setting up a dedicated backup node
# node_type: "Listener"
performance:
# Number of parallel threads used for search operations. If 0 - auto selection.
max_search_threads: 0
# Max number of threads (jobs) for running optimizations across all collections, each thread runs one job.
# If 0 - have no limit and choose dynamically to saturate CPU.
# Note: each optimization job will also use `max_indexing_threads` threads by itself for index building.
max_optimization_threads: 0
# CPU budget, how many CPUs (threads) to allocate for an optimization job.
# If 0 - auto selection, keep 1 or more CPUs unallocated depending on CPU size
# If negative - subtract this number of CPUs from the available CPUs.
# If positive - use this exact number of CPUs.
optimizer_cpu_budget: 0
# Prevent DDoS of too many concurrent updates in distributed mode.
# One external update usually triggers multiple internal updates, which breaks internal
# timings. For example, the health check timing and consensus timing.
# If null - auto selection.
update_rate_limit: null
# Limit for number of incoming automatic shard transfers per collection on this node, does not affect user-requested transfers.
# The same value should be used on all nodes in a cluster.
# Default is to allow 1 transfer.
# If null - allow unlimited transfers.
#incoming_shard_transfers_limit: 1
# Limit for number of outgoing automatic shard transfers per collection on this node, does not affect user-requested transfers.
# The same value should be used on all nodes in a cluster.
# Default is to allow 1 transfer.
# If null - allow unlimited transfers.
#outgoing_shard_transfers_limit: 1
optimizers:
# The minimal fraction of deleted vectors in a segment, required to perform segment optimization
deleted_threshold: 0.2
# The minimal number of vectors in a segment, required to perform segment optimization
vacuum_min_vector_number: 1000
# Target amount of segments optimizer will try to keep.
# Real amount of segments may vary depending on multiple parameters:
# - Amount of stored points
# - Current write RPS
#
# It is recommended to select default number of segments as a factor of the number of search threads,
# so that each segment would be handled evenly by one of the threads.
# If `default_segment_number = 0`, will be automatically selected by the number of available CPUs
default_segment_number: 0
# Do not create segments larger this size (in KiloBytes).
# Large segments might require disproportionately long indexation times,
# therefore it makes sense to limit the size of segments.
#
# If indexation speed have more priority for your - make this parameter lower.
# If search speed is more important - make this parameter higher.
# Note: 1Kb = 1 vector of size 256
# If not set, will be automatically selected considering the number of available CPUs.
max_segment_size_kb: null
# Maximum size (in KiloBytes) of vectors to store in-memory per segment.
# Segments larger than this threshold will be stored as read-only memmaped file.
# To enable memmap storage, lower the threshold
# Note: 1Kb = 1 vector of size 256
# To explicitly disable mmap optimization, set to `0`.
# If not set, will be disabled by default.
memmap_threshold_kb: null
# Maximum size (in KiloBytes) of vectors allowed for plain index.
# Default value based on https://github.com/google-research/google-research/blob/master/scann/docs/algorithms.md
# Note: 1Kb = 1 vector of size 256
# To explicitly disable vector indexing, set to `0`.
# If not set, the default value will be used.
indexing_threshold_kb: 20000
# Interval between forced flushes.
flush_interval_sec: 5
# Max number of threads (jobs) for running optimizations per shard.
# Note: each optimization job will also use `max_indexing_threads` threads by itself for index building.
# If null - have no limit and choose dynamically to saturate CPU.
# If 0 - no optimization threads, optimizations will be disabled.
max_optimization_threads: null
# This section has the same options as 'optimizers' above. All values specified here will overwrite the collections
# optimizers configs regardless of the config above and the options specified at collection creation.
#optimizers_overwrite:
# deleted_threshold: 0.2
# vacuum_min_vector_number: 1000
# default_segment_number: 0
# max_segment_size_kb: null
# memmap_threshold_kb: null
# indexing_threshold_kb: 20000
# flush_interval_sec: 5
# max_optimization_threads: null
# Default parameters of HNSW Index. Could be overridden for each collection or named vector individually
hnsw_index:
# Number of edges per node in the index graph. Larger the value - more accurate the search, more space required.
m: 16
# Number of neighbours to consider during the index building. Larger the value - more accurate the search, more time required to build index.
ef_construct: 100
# Minimal size (in KiloBytes) of vectors for additional payload-based indexing.
# If payload chunk is smaller than `full_scan_threshold_kb` additional indexing won't be used -
# in this case full-scan search should be preferred by query planner and additional indexing is not required.
# Note: 1Kb = 1 vector of size 256
full_scan_threshold_kb: 10000
# Number of parallel threads used for background index building.
# If 0 - automatically select.
# Best to keep between 8 and 16 to prevent likelihood of building broken/inefficient HNSW graphs.
# On small CPUs, less threads are used.
max_indexing_threads: 0
# Store HNSW index on disk. If set to false, index will be stored in RAM. Default: false
on_disk: false
# Custom M param for hnsw graph built for payload index. If not set, default M will be used.
payload_m: null
# Default shard transfer method to use if none is defined.
# If null - don't have a shard transfer preference, choose automatically.
# If stream_records, snapshot or wal_delta - prefer this specific method.
# More info: https://qdrant.tech/documentation/guides/distributed_deployment/#shard-transfer-method
shard_transfer_method: null
# Default parameters for collections
collection:
# Number of replicas of each shard that network tries to maintain
replication_factor: 1
# How many replicas should apply the operation for us to consider it successful
write_consistency_factor: 1
# Default parameters for vectors.
vectors:
# Whether vectors should be stored in memory or on disk.
on_disk: true
# shard_number_per_node: 1
# Default quantization configuration.
# More info: https://qdrant.tech/documentation/guides/quantization
quantization: null
service:
# Maximum size of POST data in a single request in megabytes
max_request_size_mb: 32
# Number of parallel workers used for serving the api. If 0 - equal to the number of available cores.
# If missing - Same as storage.max_search_threads
max_workers: 0
# Host to bind the service on
host: 0.0.0.0
# HTTP(S) port to bind the service on
http_port: 6333
# gRPC port to bind the service on.
# If `null` - gRPC is disabled. Default: null
# Comment to disable gRPC:
# grpc_port: 6334
# Enable CORS headers in REST API.
# If enabled, browsers would be allowed to query REST endpoints regardless of query origin.
# More info: https://developer.mozilla.org/en-US/docs/Web/HTTP/CORS
# Default: true
enable_cors: true
# Enable HTTPS for the REST and gRPC API
enable_tls: false
# Check user HTTPS client certificate against CA file specified in tls config
verify_https_client_certificate: false
# Set an api-key.
# If set, all requests must include a header with the api-key.
# example header: `api-key: <API-KEY>`
#
# If you enable this you should also enable TLS.
# (Either above or via an external service like nginx.)
# Sending an api-key over an unencrypted channel is insecure.
#
# Uncomment to enable.
# api_key: your_secret_api_key_here
# Set an api-key for read-only operations.
# If set, all requests must include a header with the api-key.
# example header: `api-key: <API-KEY>`
#
# If you enable this you should also enable TLS.
# (Either above or via an external service like nginx.)
# Sending an api-key over an unencrypted channel is insecure.
#
# Uncomment to enable.
# read_only_api_key: your_secret_read_only_api_key_here
# Uncomment to enable JWT Role Based Access Control (RBAC).
# If enabled, you can generate JWT tokens with fine-grained rules for access control.
# Use generated token instead of API key.
#
# jwt_rbac: true
cluster:
# Use `enabled: true` to run Qdrant in distributed deployment mode
enabled: false
# Configuration of the inter-cluster communication
p2p:
# Port for internal communication between peers
port: 6335
# Use TLS for communication between peers
enable_tls: false
# Configuration related to distributed consensus algorithm
consensus:
# How frequently peers should ping each other.
# Setting this parameter to lower value will allow consensus
# to detect disconnected nodes earlier, but too frequent
# tick period may create significant network and CPU overhead.
# We encourage you NOT to change this parameter unless you know what you are doing.
tick_period_ms: 100
# Set to true to prevent service from sending usage statistics to the developers.
# Read more: https://qdrant.tech/documentation/guides/telemetry
telemetry_disabled: false
# TLS configuration.
# Required if either service.enable_tls or cluster.p2p.enable_tls is true.
# tls:
# # Server certificate chain file
# cert: ./tls/cert.pem
# # Server private key file
# key: ./tls/key.pem
# # Certificate authority certificate file.
# # This certificate will be used to validate the certificates
# # presented by other nodes during inter-cluster communication.
# #
# # If verify_https_client_certificate is true, it will verify
# # HTTPS client certificate
# #
# # Required if cluster.p2p.enable_tls is true.
# # ca_cert: ./tls/cacert.pem
# # TTL in seconds to reload certificate from disk, useful for certificate rotations.
# # Only works for HTTPS endpoints. Does not support gRPC (and intra-cluster communication).
# # If `null` - TTL is disabled.
# cert_ttl: 3600
\ No newline at end of file
This diff is collapsed.
{
"name": "binary-bob",
"version": "1.0.0",
"description": "",
"main": "index.js",
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1",
"build": "npx tsc",
"start": "node build/index.js",
"dev": "npx tsc && node build/index.js"
},
"keywords": [],
"author": "echicken",
"license": "MIT",
"devDependencies": {
"@swc/cli": "^0.4.0",
"@swc/core": "^1.6.13",
"@types/express": "^4.17.21",
"@types/node": "^20.14.10",
"typescript": "^5.5.3"
},
"dependencies": {
"body-parser": "^1.20.2",
"dotenv": "^16.4.5",
"express": "^4.19.2",
"llamaindex": "^0.5.3",
"puppeteer": "^22.13.0"
},
"type": "module"
}
import express, { Express, Request, Response } from 'express';
import bodyParser from 'body-parser';
import { init, queryAgent } from './lib/ai.js';
import { embedData } from './lib/wiki.js';
const app: Express = express();
app.use(bodyParser.urlencoded({ extended: false }));
app.use(bodyParser.json());
app.get('/embed', async (req: Request, res: Response) => {
await embedData();
res.json({ status: 'ok' });
});
app.get('/query/:query', async (req: Request, res: Response) => {
const answer = await queryAgent({ query: req.params.query });
if (answer === undefined) {
res.sendStatus(404);
} else {
res.json(answer);
}
});
app.get('/', (req: Request, res: Response) => {
res.sendStatus(404);
});
(async () => {
await init();
app.listen(process.env.port ?? 3000, () => {
console.log(`binary-bob is listening at http://localhost:${process.env.port ?? 3000}`);
});
})();
\ No newline at end of file
import type { EvaluationResult } from "llamaindex/evaluation/types";
import type { BaseToolWithCall, EngineResponse, Metadata, NodeWithScore, RelatedNodeInfo, TextNode } from 'llamaindex';
import { CallbackManager, CompactAndRefine, Document, IngestionPipeline, MetadataMode, OpenAI, OpenAIAgent, OpenAIEmbedding, QdrantVectorStore, QueryEngine, QueryEngineTool, RelevancyEvaluator, ResponseSynthesizer, RetrieverQueryEngine, Settings, SimpleNodeParser, TextQaPrompt, VectorStoreIndex } from 'llamaindex';
import config from './config.js';
// configure LLM
Settings.llm = new OpenAI({ model: config.llm }) as any;
// configure embedding model
Settings.embedModel = new OpenAIEmbedding({
model: config.embeddingModel,
dimensions: config.embeddingDimension,
});
const vectorStore = new QdrantVectorStore({
collectionName: config.qdrantCollection,
url: config.qdrantUrl,
embedModel: Settings.embedModel,
});
interface Source {
title: string,
url: string,
}
interface Answer {
answer: string,
sources: Source[],
relevance: {
passing: boolean,
score: number,
},
};
const newTextQaPrompt: TextQaPrompt = ({ context, query }): string => {
return `Context:\r\n${context}\r\n\r\n---\r\n\r\nQuestion:\r\n${query}\r\n\r\n---\r\n\r\nResponse:\r\n`;
}
export async function init(): Promise<void> {
await vectorStore.initializeCollection(config.embeddingDimension);
}
async function getQueryEngine(): Promise<QueryEngine & RetrieverQueryEngine> {
const responseSynthesizer = new ResponseSynthesizer({ responseBuilder: new CompactAndRefine(undefined, newTextQaPrompt)});
const index = await VectorStoreIndex.fromVectorStore(vectorStore);
const retriever = index.asRetriever({ similarityTopK: 3 });
const queryEngine = index.asQueryEngine({ responseSynthesizer, retriever });
return queryEngine;
}
async function getQueryEngineTools(): Promise<BaseToolWithCall[]> {
const queryEngine = await getQueryEngine();
const queryEngineTool = new QueryEngineTool({
queryEngine,
metadata: {
name: 'synchronet_query_engine',
description: 'Use this engine to answer questions about installing, configuring, customizing, operating, troubleshooting, and using Synchronet BBS.',
},
});
return [queryEngineTool];
}
async function getAgent(): Promise<OpenAIAgent> {
const tools = await getQueryEngineTools();
const agent = new OpenAIAgent({
llm: Settings.llm,
tools,
verbose: true,
systemPrompt: (
'You are a polite, friendly, helpful technical support assistant for system operators of Synchronet BBS.\r\n'
+ 'Your job is to answer questions about installing, configuring, customizing, operating, troubleshooting, and using Synchronet BBS.\r\n'
+ 'You will be provided with relevant context information to help you find an answer.\r\n'
+ 'DO NOT include the context information in your response. Your job is to paraphrase and summarize this data.\r\n'
+ 'DO NOT repeat the question in your response.\r\n'
+ 'You MUST answer the question using only the provided context information and NOT any prior knowledge.\r\n'
+ 'If no answer can be found in the context information, you MUST respond with the phrase "Answer unavailable" and nothing else.\r\n'
+ 'Provide step-by-step instructions when possible and be detailed in your responses.\r\n'
),
});
return agent;
}
async function evaluateResponse(query: string, response: EngineResponse, nodes: NodeWithScore<Metadata>[]): Promise<EvaluationResult | undefined> {
if (typeof response.message.content !== 'string') return;
const evaluator = new RelevancyEvaluator();
const contexts: string[] = [];
for (const node of nodes) {
if (typeof node.node.getContent === 'function') {
contexts.push(node.node.getContent(MetadataMode.ALL));
} else {
const tn = node.node as TextNode;
if (typeof tn.text === 'string') contexts.push(tn.text);
}
}
const relevance: EvaluationResult = await evaluator.evaluate({
query,
response: response.message.content,
contexts
});
return relevance;
}
async function getAnswer(query: string, response: EngineResponse, nodes: NodeWithScore<Metadata>[]): Promise<Answer | undefined> {
if (typeof response.message.content !== 'string') return;
const relevance = await evaluateResponse(query, response, nodes);
if (relevance === undefined) return;
const answer: Answer = {
answer: response.message.content,
sources: [],
relevance: {
passing: relevance.passing,
score: relevance.score,
},
}
if (!Array.isArray(nodes) || nodes.length < 1) return answer;
for (const node of nodes) {
if (node.score === undefined || node.score < .5) continue;
if (node.node.relationships.SOURCE === undefined) continue;
const source = node.node.relationships.SOURCE as RelatedNodeInfo;
if (source.nodeId === undefined) continue;
if (answer.sources.some(e => e.url === source.nodeId)) continue;
answer.sources.push({ url: source.nodeId, title: source.metadata.title });
}
return answer;
}
export async function query({ query }: { query: string }): Promise<Answer | undefined> {
const queryEngine = await getQueryEngine();
const response = await queryEngine.query({ query });
if (typeof response.message.content !== 'string') return;
if (!Array.isArray(response.sourceNodes) || response.sourceNodes.length < 1) return;
const answer = await getAnswer(query, response, response.sourceNodes);
return answer;
}
export async function queryAgent({ query }: { query: string }): Promise<Answer | undefined> {
console.debug(`Querying agent with: ${query}`);
const agent = await getAgent();
const callbackManager = new CallbackManager(); // https://github.com/run-llama/LlamaIndexTS/issues/1015
const sourceNodes = new Promise<NodeWithScore<Metadata>[]>((res) => { // Let's just get out of callback-land as quickly as possible eh?
callbackManager.on('retrieve-end', (data) => {
res(data.detail?.nodes ?? []);
});
});
const response = await Settings.withCallbackManager(callbackManager, () => {
return agent.chat({ message: query });
});
callbackManager.dispatchEvent('retrieve-end', { query, nodes: [] }); // If response came from function tool, this event will not have fired, so we'll force it here; otherwise this will be ignored since retrieval will already have happened
console.debug(`Agent responded with: ${response}`, typeof response, JSON.stringify(response));
if (typeof response.message.content !== 'string') return;
const nodes = await sourceNodes;
const answer = await getAnswer(query, response, nodes);
return answer;
}
export async function resetCollection(): Promise<any> {
const client = vectorStore.client();
await client.deleteCollection(config.qdrantCollection);
await vectorStore.initializeCollection(config.embeddingDimension);
}
export async function ingestText(text: string, id: string): Promise<void> {
const document = new Document({
text: text,
id_: id,
metadata: { title: id },
});
const pipeline = new IngestionPipeline({
transformations: [
new SimpleNodeParser(),
Settings.embedModel,
],
vectorStore,
});
const nodes = await pipeline.run({ documents: [document] });
}
export default {
init,
resetCollection,
query,
queryAgent,
ingestText,
};
\ No newline at end of file
import dotenv from 'dotenv';
dotenv.config();
export default {
embeddingModel: process.env.OPENAI_EMBEDDING_MODEL ?? 'text-embedding-3-small',
embeddingDimension: parseInt(process.env.EMBEDDING_DIMENSION ?? '1536', 10),
httpPort: process.env.HTTP_PORT ?? 3000,
llm: 'gpt-3.5-turbo-0125',
qdrantCollection: 'synchronet',
qdrantUrl: `http://${process.env.QDRANT_HOST ?? 'qdrant'}:${process.env.QDRANT_PORT ?? '6333'}`,
}
\ No newline at end of file
import fs from 'fs';
import path from 'path';
import puppeteer from 'puppeteer';
import { ingestText, resetCollection } from './ai.js';
const BASE_URL = 'https://wiki.synchro.net';
const OUT_FILE = path.join(path.resolve(), 'data', 'wiki.json');
export async function extractData(): Promise<void> {
if (fs.existsSync(OUT_FILE)) {
const { mtime } = fs.statSync(OUT_FILE);
if ((new Date()).getTime() - mtime.getTime() < (1000 * 60 * 60 * 24)) return;
}
const browser = await puppeteer.launch({
executablePath: '/usr/bin/google-chrome',
args: [
'--no-sandbox',
],
});
const page = await browser.newPage();
await page.goto(`${BASE_URL}/wiki:site_index`);
await page.setViewport({ width: 1080, height: 1024 });
const paths = await page.evaluate(() => {
const elements = document.querySelector('div > div.indexmenu_nojs > ul[role="tree"].idx').getElementsByTagName('a');
if (elements === undefined || elements === null) return;
const paths: string[] = [];
for (const element of elements) {
paths.push(element.getAttribute('href'));
}
return paths;
});
const pages: Record<string, string> = {}; // string[] = [];
for (const p of paths) {
if (p.search(/^\/es:/) === 0) continue;
if (p.search(/^\/wiki:/) === 0) continue;
console.debug(p);
const page = await browser.newPage();
await page.goto(`${BASE_URL}${p}`);
await page.setViewport({ width: 1080, height: 1024 });
const content = await page.evaluate(() => {
const body = document.getElementById('bodyContent')?.innerHTML;
if (body === undefined || body === null) return;
const match = body.match(/<!-- start rendered wiki content -->([\s\S]*?)<!-- end rendered wiki content -->/);
if (match === null) return;
return match[1];
});
if (content === undefined) continue;
pages[p] = content;
}
fs.writeFileSync(OUT_FILE, JSON.stringify(pages));
await browser.close();
}
export async function embedData(): Promise<void> {
await extractData();
await resetCollection();
const wiki: Record<string, string> = JSON.parse(fs.readFileSync(OUT_FILE, 'utf8'));
for (const page in wiki) {
await ingestText(wiki[page], `${BASE_URL}page`);
}
}
\ No newline at end of file
{
"compilerOptions": {
"module": "ESNext",
"esModuleInterop": true,
"target": "ESNext",
"moduleResolution": "Bundler",
"sourceMap": true,
"outDir": "build",
"skipLibCheck": true
},
"include": ["src/**/*"],
"exclude": ["node_modules"]
}
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment