From b1300acb78cd41484f654e45a6bdcc7fdddba091 Mon Sep 17 00:00:00 2001 From: empty Date: Wed, 3 Dec 2025 16:47:13 +0800 Subject: [PATCH] =?UTF-8?q?chore:=20=E6=B7=BB=E5=8A=A0=20.gitignore,=20LIC?= =?UTF-8?q?ENSE,=20README.md?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 39 ++++++++++++++++++++++++++ LICENSE | 21 ++++++++++++++ README.md | 82 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 142 insertions(+) create mode 100644 .gitignore create mode 100644 LICENSE create mode 100644 README.md diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..c1f9307 --- /dev/null +++ b/.gitignore @@ -0,0 +1,39 @@ +# Dependencies +node_modules/ +__pycache__/ +*.py[cod] +*$py.class +.venv/ +venv/ +env/ + +# IDE +.idea/ +.vscode/ +*.swp +*.swo +*~ +.DS_Store + +# Build outputs +dist/ +build/ +*.egg-info/ + +# Logs +*.log +npm-debug.log* + +# Environment +.env +.env.local +.env.*.local + +# Test +coverage/ +.pytest_cache/ + +# Temp files +*.tmp +*.temp +.cache/ diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..d26f41a --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2025 let5see + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..a3231e6 --- /dev/null +++ b/README.md @@ -0,0 +1,82 @@ +# Web2MCP + +将网页内容转换为大模型友好格式的工具集。 + +## 项目结构 + +``` +web2mcp/ +├── browser-extension/ # Chrome 浏览器扩展 +│ ├── manifest.json # 扩展配置 +│ ├── popup.html # 弹出界面 +│ ├── popup.js # 弹出逻辑 +│ ├── content.js # 内容提取脚本 +│ ├── content.css # 样式 +│ └── icons/ # 图标 +├── mcp.py # Python 网页抓取脚本 +└── README.md +``` + +## 功能 + +### 浏览器扩展 + +一个 Chrome 扩展,用于截取网页内容并转换为结构化格式。 + +**特性:** +- 🎯 区域框选提取 +- 📄 整页内容提取 +- 📝 多格式输出(Markdown / JSON / XML) +- 📋 自动复制到剪贴板 + +**安装:** +1. 打开 Chrome,访问 `chrome://extensions/` +2. 开启右上角 **开发者模式** +3. 点击 **加载已解压的扩展程序** +4. 选择 `browser-extension` 文件夹 + +### Python 脚本 + +用于抓取网页并提取结构化内容。 + +```bash +python mcp.py +``` + +## 输出格式示例 + +### Markdown +```markdown +# 标题 + +这是一段文字内容。 + +- 列表项 1 +- 列表项 2 +``` + +### JSON +```json +[ + { + "type": "heading", + "level": 1, + "content": "标题" + }, + { + "type": "paragraph", + "content": "这是一段文字内容。" + } +] +``` + +## 依赖 + +**Python:** +```bash +pip install requests beautifulsoup4 +``` + +## License + +[MIT](LICENSE)