add torch_gc to clear gpu cache in knowledge_based_chatglm.py

c4b52dda · littlepanda0716 · 5664d1ff · c4b52dda
--- a/chatglm_llm.py
+++ b/chatglm_llm.py
@@ -52,6 +52,7 @@ class ChatGLM(LLM):
            max_length=self.max_token,
            temperature=self.temperature,
        )
+        torch_gc()
        print("history: ", self.history)
        if stop is not None:
            response = enforce_stop_tokens(response, stop)