๐Ÿ“ฆ langgenius / dify

๐Ÿ“„ disable_segment_from_index_task.py ยท 76 lines
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76import logging
import time

import click
from celery import shared_task

from core.db.session_factory import session_factory
from core.rag.index_processor.index_processor_factory import IndexProcessorFactory
from extensions.ext_redis import redis_client
from models.dataset import DocumentSegment

logger = logging.getLogger(__name__)


@shared_task(queue="dataset")
def disable_segment_from_index_task(segment_id: str):
    """
    Async disable segment from index
    :param segment_id:

    Usage: disable_segment_from_index_task.delay(segment_id)
    """
    logger.info(click.style(f"Start disable segment from index: {segment_id}", fg="green"))
    start_at = time.perf_counter()

    with session_factory.create_session() as session:
        segment = session.query(DocumentSegment).where(DocumentSegment.id == segment_id).first()
        if not segment:
            logger.info(click.style(f"Segment not found: {segment_id}", fg="red"))
            return

        if segment.status != "completed":
            logger.info(click.style(f"Segment is not completed, disable is not allowed: {segment_id}", fg="red"))
            return

        indexing_cache_key = f"segment_{segment.id}_indexing"

        try:
            dataset = segment.dataset

            if not dataset:
                logger.info(click.style(f"Segment {segment.id} has no dataset, pass.", fg="cyan"))
                return

            dataset_document = segment.document

            if not dataset_document:
                logger.info(click.style(f"Segment {segment.id} has no document, pass.", fg="cyan"))
                return

            if (
                not dataset_document.enabled
                or dataset_document.archived
                or dataset_document.indexing_status != "completed"
            ):
                logger.info(click.style(f"Segment {segment.id} document status is invalid, pass.", fg="cyan"))
                return

            index_type = dataset_document.doc_form
            index_processor = IndexProcessorFactory(index_type).init_index_processor()
            index_processor.clean(dataset, [segment.index_node_id])

            end_at = time.perf_counter()
            logger.info(
                click.style(
                    f"Segment removed from index: {segment.id} latency: {end_at - start_at}",
                    fg="green",
                )
            )
        except Exception:
            logger.exception("remove segment from index failed")
            segment.enabled = True
            session.commit()
        finally:
            redis_client.delete(indexing_cache_key)