mirror of
https://github.com/severian-dev/sucker.severian.dev.git
synced 2025-10-27 20:25:47 +00:00
notebook no longer needed
This commit is contained in:
@@ -1,290 +0,0 @@
|
|||||||
{
|
|
||||||
"cells": [
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {
|
|
||||||
"id": "zMVSOtXUASM8"
|
|
||||||
},
|
|
||||||
"source": [
|
|
||||||
"## **Card Definition Extractor**\n",
|
|
||||||
"\n",
|
|
||||||
"Standalone version with directions: https://sucker.severian.dev\n",
|
|
||||||
"\n",
|
|
||||||
"I've gotten into making models at [trashpanda-org](https://huggingface.co/trashpanda-org), check out hasnonname's [Mullein](https://huggingface.co/trashpanda-org/MS-24B-Mullein-v0)!\n",
|
|
||||||
"\n",
|
|
||||||
"> _lmk on Discord if you have any issues while using this - Severian_\n",
|
|
||||||
"\n",
|
|
||||||
"---\n",
|
|
||||||
"\n",
|
|
||||||
"**Changelog:**\n",
|
|
||||||
"- v0.2: fixed to handle Janitor making changes due to R1 handling.\n"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {
|
|
||||||
"colab": {
|
|
||||||
"background_save": true,
|
|
||||||
"base_uri": "https://localhost:8080/"
|
|
||||||
},
|
|
||||||
"id": "a0pFE9KCDh8P",
|
|
||||||
"outputId": "d647688d-e541-4e5f-e13d-4b385ee84d8b"
|
|
||||||
},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"Requirement already satisfied: flask-cors in /usr/local/lib/python3.11/dist-packages (5.0.0)\n",
|
|
||||||
"Requirement already satisfied: Flask>=0.9 in /usr/local/lib/python3.11/dist-packages (from flask-cors) (3.1.0)\n",
|
|
||||||
"Requirement already satisfied: Werkzeug>=3.1 in /usr/local/lib/python3.11/dist-packages (from Flask>=0.9->flask-cors) (3.1.3)\n",
|
|
||||||
"Requirement already satisfied: Jinja2>=3.1.2 in /usr/local/lib/python3.11/dist-packages (from Flask>=0.9->flask-cors) (3.1.5)\n",
|
|
||||||
"Requirement already satisfied: itsdangerous>=2.2 in /usr/local/lib/python3.11/dist-packages (from Flask>=0.9->flask-cors) (2.2.0)\n",
|
|
||||||
"Requirement already satisfied: click>=8.1.3 in /usr/local/lib/python3.11/dist-packages (from Flask>=0.9->flask-cors) (8.1.8)\n",
|
|
||||||
"Requirement already satisfied: blinker>=1.9 in /usr/local/lib/python3.11/dist-packages (from Flask>=0.9->flask-cors) (1.9.0)\n",
|
|
||||||
"Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.11/dist-packages (from Jinja2>=3.1.2->Flask>=0.9->flask-cors) (3.0.2)\n",
|
|
||||||
"Requirement already satisfied: flask-cors in /usr/local/lib/python3.11/dist-packages (5.0.0)\n",
|
|
||||||
"Requirement already satisfied: flask_cloudflared in /usr/local/lib/python3.11/dist-packages (0.0.14)\n",
|
|
||||||
"Requirement already satisfied: Flask>=0.9 in /usr/local/lib/python3.11/dist-packages (from flask-cors) (3.1.0)\n",
|
|
||||||
"Requirement already satisfied: requests in /usr/local/lib/python3.11/dist-packages (from flask_cloudflared) (2.32.3)\n",
|
|
||||||
"Requirement already satisfied: Werkzeug>=3.1 in /usr/local/lib/python3.11/dist-packages (from Flask>=0.9->flask-cors) (3.1.3)\n",
|
|
||||||
"Requirement already satisfied: Jinja2>=3.1.2 in /usr/local/lib/python3.11/dist-packages (from Flask>=0.9->flask-cors) (3.1.5)\n",
|
|
||||||
"Requirement already satisfied: itsdangerous>=2.2 in /usr/local/lib/python3.11/dist-packages (from Flask>=0.9->flask-cors) (2.2.0)\n",
|
|
||||||
"Requirement already satisfied: click>=8.1.3 in /usr/local/lib/python3.11/dist-packages (from Flask>=0.9->flask-cors) (8.1.8)\n",
|
|
||||||
"Requirement already satisfied: blinker>=1.9 in /usr/local/lib/python3.11/dist-packages (from Flask>=0.9->flask-cors) (1.9.0)\n",
|
|
||||||
"Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests->flask_cloudflared) (3.4.1)\n",
|
|
||||||
"Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests->flask_cloudflared) (3.10)\n",
|
|
||||||
"Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests->flask_cloudflared) (2.3.0)\n",
|
|
||||||
"Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests->flask_cloudflared) (2024.12.14)\n",
|
|
||||||
"Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.11/dist-packages (from Jinja2>=3.1.2->Flask>=0.9->flask-cors) (3.0.2)\n",
|
|
||||||
" * Serving Flask app '__main__'\n",
|
|
||||||
" * Debug mode: off\n"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "stderr",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"INFO:werkzeug:\u001b[31m\u001b[1mWARNING: This is a development server. Do not use it in a production deployment. Use a production WSGI server instead.\u001b[0m\n",
|
|
||||||
" * Running on http://127.0.0.1:5000\n",
|
|
||||||
"INFO:werkzeug:\u001b[33mPress CTRL+C to quit\u001b[0m\n"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
" * Running on https://little-disputes-posting-palmer.trycloudflare.com\n",
|
|
||||||
" * Traffic stats available on http://127.0.0.1:8396/metrics\n"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "stderr",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"INFO:werkzeug:127.0.0.1 - - [04/Feb/2025 22:53:13] \"OPTIONS / HTTP/1.1\" 200 -\n"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"Card definition JSON created at: /tmp/tmpynlda8kv.json\n"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"data": {
|
|
||||||
"application/javascript": "\n async function download(id, filename, size) {\n if (!google.colab.kernel.accessAllowed) {\n return;\n }\n const div = document.createElement('div');\n const label = document.createElement('label');\n label.textContent = `Downloading \"${filename}\": `;\n div.appendChild(label);\n const progress = document.createElement('progress');\n progress.max = size;\n div.appendChild(progress);\n document.body.appendChild(div);\n\n const buffers = [];\n let downloaded = 0;\n\n const channel = await google.colab.kernel.comms.open(id);\n // Send a message to notify the kernel that we're ready.\n channel.send({})\n\n for await (const message of channel.messages) {\n // Send a message to notify the kernel that we're ready.\n channel.send({})\n if (message.buffers) {\n for (const buffer of message.buffers) {\n buffers.push(buffer);\n downloaded += buffer.byteLength;\n progress.value = downloaded;\n }\n }\n }\n const blob = new Blob(buffers, {type: 'application/binary'});\n const a = document.createElement('a');\n a.href = window.URL.createObjectURL(blob);\n a.download = filename;\n div.appendChild(a);\n a.click();\n div.remove();\n }\n ",
|
|
||||||
"text/plain": [
|
|
||||||
"<IPython.core.display.Javascript object>"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"metadata": {},
|
|
||||||
"output_type": "display_data"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"data": {
|
|
||||||
"application/javascript": "download(\"download_8462ad82-aca0-40a4-8cbb-0342ff5a7e1c\", \"tmpynlda8kv.json\", 14791)",
|
|
||||||
"text/plain": [
|
|
||||||
"<IPython.core.display.Javascript object>"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"metadata": {},
|
|
||||||
"output_type": "display_data"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "stderr",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"INFO:werkzeug:127.0.0.1 - - [04/Feb/2025 22:53:14] \"POST / HTTP/1.1\" 200 -\n"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"# @title Card Definition Extractor\n",
|
|
||||||
"\n",
|
|
||||||
"# @markdown Directions for use:\n",
|
|
||||||
"# @markdown - If enabled, starts the proxy in character card extraction mode.\n",
|
|
||||||
"# @markdown - Use the proxy as normal, and start a new chat with your character of choice.\n",
|
|
||||||
"# @markdown - After sending the first message, the proxy will process the character card in v1 format\n",
|
|
||||||
"# @markdown - Stop the proxy and Colab will download the JSON file on your device\n",
|
|
||||||
"# @markdown - Your custom prompt will appear on the description field so this is best used with a cleared-out custom prompt section on janitor.ai\n",
|
|
||||||
"# @markdown - You can start multiple new chats and send messages for the extractor to capture cards, and when you stop the notebook, it will download all extracted files at once.\n",
|
|
||||||
"\n",
|
|
||||||
"# @markdown **Select Tunnel Provider**\n",
|
|
||||||
"tunnel_provider = \"Cloudflare\" # @param [\"Cloudflare\", \"Localtunnel\", \"Ngrok\"]\n",
|
|
||||||
"\n",
|
|
||||||
"# @markdown **Ngrok Auth Token**: If using ngrok, sign up for an auth token at https://dashboard.ngrok.com/signup\n",
|
|
||||||
"ngrok_auth_token = \"\" # @param {type:\"string\"}\n",
|
|
||||||
"\n",
|
|
||||||
"card_definition_extractor = True\n",
|
|
||||||
"!pip install flask-cors\n",
|
|
||||||
"\n",
|
|
||||||
"import json\n",
|
|
||||||
"import requests\n",
|
|
||||||
"import time\n",
|
|
||||||
"from flask import Flask, request, jsonify\n",
|
|
||||||
"from flask_cors import CORS\n",
|
|
||||||
"import re\n",
|
|
||||||
"import tempfile\n",
|
|
||||||
"import os\n",
|
|
||||||
"\n",
|
|
||||||
"app = Flask(__name__)\n",
|
|
||||||
"CORS(app)\n",
|
|
||||||
"\n",
|
|
||||||
"# Depending on the provider, set up the tunnel\n",
|
|
||||||
"if tunnel_provider == \"Cloudflare\":\n",
|
|
||||||
" !pip install flask-cors flask_cloudflared\n",
|
|
||||||
" from flask_cloudflared import run_with_cloudflared\n",
|
|
||||||
" run_with_cloudflared(app)\n",
|
|
||||||
"elif tunnel_provider == \"Localtunnel\":\n",
|
|
||||||
" !pip install flask-cors flask_localtunnel\n",
|
|
||||||
" from flask_lt import run_with_lt\n",
|
|
||||||
" run_with_lt(app)\n",
|
|
||||||
"elif tunnel_provider == \"Ngrok\":\n",
|
|
||||||
" !pip install flask-cors pyngrok==7.1.2\n",
|
|
||||||
" from pyngrok import ngrok\n",
|
|
||||||
" if ngrok_auth_token.strip():\n",
|
|
||||||
" ngrok.set_auth_token(ngrok_auth_token.strip())\n",
|
|
||||||
" public_url = ngrok.connect(5000).public_url\n",
|
|
||||||
" print(\"Public URL:\", public_url)\n",
|
|
||||||
"\n",
|
|
||||||
"def extract_between_tags(content, tag):\n",
|
|
||||||
" \"\"\"\n",
|
|
||||||
" Extracts content between XML-like tags.\n",
|
|
||||||
" Returns empty string if tag not found.\n",
|
|
||||||
" \"\"\"\n",
|
|
||||||
" start_tag = f\"<{tag}>\"\n",
|
|
||||||
" end_tag = f\"</{tag}>\"\n",
|
|
||||||
" start_idx = content.find(start_tag)\n",
|
|
||||||
" if start_idx == -1:\n",
|
|
||||||
" return \"\"\n",
|
|
||||||
" \n",
|
|
||||||
" end_idx = content.find(end_tag, start_idx)\n",
|
|
||||||
" if end_idx == -1:\n",
|
|
||||||
" return \"\"\n",
|
|
||||||
" \n",
|
|
||||||
" return content[start_idx + len(start_tag):end_idx].strip()\n",
|
|
||||||
"\n",
|
|
||||||
"def find_tags_between(content, start_marker, end_marker):\n",
|
|
||||||
" \"\"\"\n",
|
|
||||||
" Finds all XML-like tags and their content between two marker tags.\n",
|
|
||||||
" Returns list of {tag, content} dictionaries.\n",
|
|
||||||
" \"\"\"\n",
|
|
||||||
" start_idx = content.find(f\"<{start_marker}>\")\n",
|
|
||||||
" if start_idx == -1:\n",
|
|
||||||
" return []\n",
|
|
||||||
" \n",
|
|
||||||
" end_idx = content.find(f\"<{end_marker}>\")\n",
|
|
||||||
" if end_idx == -1:\n",
|
|
||||||
" return []\n",
|
|
||||||
" \n",
|
|
||||||
" section = content[start_idx + len(start_marker) + 2:end_idx]\n",
|
|
||||||
" tag_regex = r\"<([^/>]+)>([^<]+)</\\1>\"\n",
|
|
||||||
" matches = re.finditer(tag_regex, section)\n",
|
|
||||||
" \n",
|
|
||||||
" return [{\"tag\": match.group(1), \"content\": match.group(2).strip()} for match in matches]\n",
|
|
||||||
"\n",
|
|
||||||
"def extract_card_data(messages):\n",
|
|
||||||
" content0 = messages[0][\"content\"]\n",
|
|
||||||
" content1 = messages[2][\"content\"]\n",
|
|
||||||
"\n",
|
|
||||||
" # Find all persona tags between system and scenario, take the last one as character\n",
|
|
||||||
" personas = find_tags_between(content0, \"system\", \"scenario\")\n",
|
|
||||||
" char_persona = personas[-1] if personas else {\"tag\": \"\", \"content\": \"\"}\n",
|
|
||||||
" char_name = char_persona[\"tag\"]\n",
|
|
||||||
"\n",
|
|
||||||
" card_data = {\n",
|
|
||||||
" \"name\": char_name,\n",
|
|
||||||
" \"description\": char_persona[\"content\"],\n",
|
|
||||||
" \"scenario\": extract_between_tags(content0, \"scenario\"),\n",
|
|
||||||
" \"mes_example\": extract_between_tags(content0, \"example_dialogs\"),\n",
|
|
||||||
" \"personality\": \"\", # This field isn't used in the new format\n",
|
|
||||||
" \"first_mes\": content1\n",
|
|
||||||
" }\n",
|
|
||||||
"\n",
|
|
||||||
" # Replace character name with placeholder in all fields\n",
|
|
||||||
" def safe_replace(text, old, new):\n",
|
|
||||||
" return text.replace(old, new) if old else text\n",
|
|
||||||
"\n",
|
|
||||||
" for field in card_data:\n",
|
|
||||||
" if field != \"name\": # Exclude the \"name\" field\n",
|
|
||||||
" val = card_data[field]\n",
|
|
||||||
" val = safe_replace(val, char_name, \"{{char}}\")\n",
|
|
||||||
" card_data[field] = val\n",
|
|
||||||
"\n",
|
|
||||||
" return card_data\n",
|
|
||||||
"\n",
|
|
||||||
"@app.route('/', methods=['GET'])\n",
|
|
||||||
"def default():\n",
|
|
||||||
" return {\"status\": \"online\"}\n",
|
|
||||||
"\n",
|
|
||||||
"@app.route('/', methods=['POST'])\n",
|
|
||||||
"def process_card():\n",
|
|
||||||
" body = request.json\n",
|
|
||||||
" if 'messages' not in body:\n",
|
|
||||||
" return jsonify(error=\"Missing 'messages' in request body\"), 400\n",
|
|
||||||
"\n",
|
|
||||||
" if card_definition_extractor and len(body[\"messages\"]) >= 2:\n",
|
|
||||||
" card_data = extract_card_data(body[\"messages\"])\n",
|
|
||||||
" # If running in Colab, download the file\n",
|
|
||||||
" try:\n",
|
|
||||||
" from google.colab import files\n",
|
|
||||||
" import tempfile\n",
|
|
||||||
" temp_json = tempfile.NamedTemporaryFile(delete=False, suffix=\".json\")\n",
|
|
||||||
" with open(temp_json.name, 'w', encoding='utf-8') as f:\n",
|
|
||||||
" json.dump(card_data, f, ensure_ascii=False, indent=2)\n",
|
|
||||||
" print(\"Card definition JSON created at:\", temp_json.name)\n",
|
|
||||||
" files.download(temp_json.name)\n",
|
|
||||||
" except ImportError:\n",
|
|
||||||
" pass # Not in Colab, just return JSON\n",
|
|
||||||
"\n",
|
|
||||||
" return jsonify(card_data), 200\n",
|
|
||||||
" else:\n",
|
|
||||||
" return jsonify(status=\"Card definition extractor not enabled or insufficient messages\"), 200\n",
|
|
||||||
"\n",
|
|
||||||
"if __name__ == '__main__':\n",
|
|
||||||
" if tunnel_provider != \"Cloudflare\":\n",
|
|
||||||
" print('\\n Colab IP: ', end='')\n",
|
|
||||||
" !curl ipecho.net/plain\n",
|
|
||||||
" print('\\n')\n",
|
|
||||||
" app.run()\n"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"metadata": {
|
|
||||||
"colab": {
|
|
||||||
"provenance": []
|
|
||||||
},
|
|
||||||
"kernelspec": {
|
|
||||||
"display_name": "Python 3",
|
|
||||||
"name": "python3"
|
|
||||||
},
|
|
||||||
"language_info": {
|
|
||||||
"name": "python"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"nbformat": 4,
|
|
||||||
"nbformat_minor": 0
|
|
||||||
}
|
|
||||||
Reference in New Issue
Block a user