Source

Target

Showing with 163 additions and 30 deletions
+163 -30
venv/
README.md
.env
.coverage
key.jwk
*.session
*.sqlite3
......@@ -3,4 +3,4 @@ WORKDIR /app
COPY requirements.txt .
RUN pip install -r requirements.txt
COPY . .
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8000"]
CMD python ./wait_for_client.py && python ./app.py generate_key && uvicorn app:app --host 0.0.0.0 --port 8000
......@@ -29,26 +29,34 @@ Monolithic backend of IoC scraper service
1. Install docker
2. Save enviroment variables in .env file
3. Run docker image
3. Run docker image with .env file mounted
```bash
docker run -d --env-file .env -p 8000:8000 --name backend registry.miem.hse.ru/indicators-parser-saas/backend
docker run -d --env-file .env -p 8000:8000 --name backend -it registry.miem.hse.ru/indicators-parser-saas/backend
```
4. In another terminal run client generation
```bash
docker exec -it backend python ./app.py
```
## Settings
| **Name** | **Description** | **Is required?** |
|:----------------------:|:-------------------------------------------------------------------------------------------------------------------:|:----------------:|
| `DB_URI` | Database [URL](https://tortoise.github.io/databases.html) | ✅ |
| `GITHUB_CLIENT_ID` | [GitHub OAuth](https://docs.github.com/en/developers/apps/building-oauth-apps/authorizing-oauth-apps) client id | ✅ |
| `GITHUB_CLIENT_SECRET` | [GitHub OAuth](https://docs.github.com/en/developers/apps/building-oauth-apps/authorizing-oauth-apps) client secret | ✅ |
| `JWK_KEY_FILE` | RSA Private and Public key in Json Web Key format (key.jwk by default) | |
| `S3_SECRET_KEY` | AWS S3 or S3 like storage static key secret | ✅ |
| `S3_ACCESS_KEY` | AWS S3 or S3 like storage static access key | ✅ |
| `S3_REGION_NAME` | AWS S3 or S3 like storage region name | ✅ |
| `S3_ENDPOINT_URL` | AWS S3 or S3 like storage endpoint URL | ✅ |
| `S3_REPORTS_FOLDER` | AWS S3 or S3 like storage reports base path | |
| `S3_BASE_URL` | Base URL for generating not signed URLs | ✅ |
| **Name** | **Description** | **Is required?** |
|:-----------------------:|:-------------------------------------------------------------------------------------------------------------------:|:----------------:|
| `DB_URI` | Database [URL](https://tortoise.github.io/databases.html) | ✅ |
| `GITHUB_CLIENT_ID` | [GitHub OAuth](https://docs.github.com/en/developers/apps/building-oauth-apps/authorizing-oauth-apps) client id | ✅ |
| `GITHUB_CLIENT_SECRET` | [GitHub OAuth](https://docs.github.com/en/developers/apps/building-oauth-apps/authorizing-oauth-apps) client secret | ✅ |
| `JWK_KEY_FILE` | RSA Private and Public key in Json Web Key format (`key.jwk` by default) | |
| `S3_SECRET_KEY` | AWS S3 or S3 like storage static key secret | ✅ |
| `S3_ACCESS_KEY` | AWS S3 or S3 like storage static access key | ✅ |
| `S3_REGION_NAME` | AWS S3 or S3 like storage region name | ✅ |
| `S3_ENDPOINT_URL` | AWS S3 or S3 like storage endpoint URL | ✅ |
| `S3_REPORTS_FOLDER` | AWS S3 or S3 like storage reports base path | |
| `S3_BASE_URL` | Base URL for generating not signed URLs | ✅ |
| `TELEGRAM_API_ID` | Telegram API ID from [my.telegram.org](https://my.telegram.org) | ✅ |
| `TELEGRAM_API_HASH` | Telegram API hash from [my.telegram.org](https://my.telegram.org) | ✅ |
| `TELEGRAM_SESSION_NAME` | Telegram session file name (`ioc` by default) | |
## Documentation
`//TODO: Add documentation here`
Documentation is powered by [Sphinx](https://www.sphinx-doc.org/en/master/) and available
at [Read The Docs](https://ioc-backend.readthedocs.io/en/latest/genindex.html)
......@@ -2,7 +2,6 @@ from os.path import exists
from loguru import logger
from starlette.applications import Starlette
from tortoise import Tortoise
from sys import argv
from tortoise.contrib.starlette import register_tortoise
......@@ -12,14 +11,9 @@ from auth.utils import create_and_save_key
from endpoints.routes import routes
from responses.errors import ApiError, handle_api_error
from settings import DB_URI
from integrations.telegram import client
from integrations.telegram import start, client
async def start_client():
await client.start()
app = Starlette(routes=routes, on_startup=[key.load_key, start_client], on_shutdown=[client.disconnect],
app = Starlette(routes=routes, on_startup=[key.load_key, start],
exception_handlers={
ApiError: handle_api_error
})
......
......@@ -4,4 +4,5 @@ from .user import get_me, create_bot_token, get_bot_tokens, delete_bot_token
from .parser import load_report
from .indicators import get_indicators_from_group, get_indicator_groups
from .watcher import create_watcher
from .parser import get_reports
from .parser import get_reports, get_page_report
from .static import token
......@@ -27,7 +27,7 @@ async def github_oauth_callback(request: Request):
)
await user.save()
token = user.create_token()
resp = RedirectResponse("/", 301)
resp = RedirectResponse("/token", 301)
resp.set_cookie("token", token)
return resp
......
......@@ -2,6 +2,7 @@ import os
from uuid import uuid4
from tempfile import NamedTemporaryFile
from aiohttp import request as aiohttp_request
from starlette.background import BackgroundTasks
from starlette.datastructures import UploadFile
from starlette.requests import Request
......@@ -11,8 +12,8 @@ from models import Report, IndicatorGroup
from models.indicator import IndicatorGroupPD
from models.report import ReportPD
from parsers.pdf_parser import process_pdf
from parsers.text_parser import CollectedData
from responses.errors import ReportNotPresented
from parsers.text_parser import CollectedData, find_ioc
from responses.errors import ReportNotPresented, ReportURLError
from responses.responses import OkResponse
......@@ -50,3 +51,21 @@ async def get_reports(request: Request):
report.owner = request.state.user
as_pd.append(ReportPD.from_orm(report))
return OkResponse(as_pd)
async def get_page_report(request: Request):
""" Creates group from news page """
url = request.query_params["url"]
try:
async with aiohttp_request("GET", url) as resp:
page = await resp.text()
if resp.status != 200:
raise ReportURLError
except Exception:
raise ReportURLError
data = find_ioc(page.lower())
group = await IndicatorGroup.from_reports_collected_data(data, request.state.user)
group.description = "Group from page " + url
await group.save()
group_pd = IndicatorGroupPD.from_orm(group)
return OkResponse({"indicator_group": group_pd})
......@@ -7,7 +7,8 @@ from auth.middleware import JWTAuthenticationMiddleware
unauthenticated_routes = [
Route("/ping", ping, methods=["GET"]),
Route("/login/github", github_oauth_redirect, methods=["GET"]),
Route("/oauth/github", github_oauth_callback, methods=["GET"])
Route("/oauth/github", github_oauth_callback, methods=["GET"]),
Route("/token", token, methods=["GET"]),
]
api_routes = [
......@@ -19,7 +20,8 @@ api_routes = [
Route("/deleteBotToken", delete_bot_token, methods=["GET"]),
Route("/createWatcher", create_watcher, methods=["POST"]),
Route("/getIndicatorGroups", get_indicator_groups, methods=["GET"]),
Route("/getReports", get_reports, methods=["GET"])
Route("/getReports", get_reports, methods=["GET"]),
Route("/loadPageReport", get_page_report, methods=["GET"]),
]
admin_routes = []
......
from h11 import Request
from starlette.responses import HTMLResponse
token_page = """
<!doctype html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport"
content="width=device-width, user-scalable=no, initial-scale=1.0, maximum-scale=1.0, minimum-scale=1.0">
<meta http-equiv="X-UA-Compatible" content="ie=edge">
<title>Token create</title>
</head>
<body>
<div style="text-align: center;">
Created token: <span id="token"></span>
</div>
<script>
// Get token from cookie
const token = document.cookie.split('; ').find(row => row.startsWith('token=')).split('=')[1];
if (token) {
document.getElementById('token').innerText = token;
}
</script>
</body>
</html>
"""
async def token(request: Request):
""" Returns page, that shows the token """
return HTMLResponse(token_page)
......@@ -25,3 +25,8 @@ async def on_message(event: Message):
owner_id=watcher.owner_id)
if len(created_indicators) > 0:
await Indicator.bulk_create(created_indicators)
async def start():
""" Starts the Telegram client """
await client.start()
......@@ -87,3 +87,10 @@ class TelegramInvalidLink(ApiError):
code = 604
description = "Telegram channel join link is invalid"
http_code = 400
class ReportURLError(ApiError):
""" Report URL is invalid """
code = 701
description = "Report URL is invalid, or 200 status code is not returned"
http_code = 422
......@@ -45,3 +45,16 @@ def test_github_oauth_redirect(sti):
redirect_url = urlparse(resp.headers["Location"])
assert redirect_url.path == "/login/oauth/authorize"
assert redirect_url.hostname == "github.com"
def test_login_redirect_token_page(mocked, sti):
mocked.post("https://github.com/login/oauth/access_token", status=200, body=dumps({
"access_token": "ghAAAA",
"scope": "repo,gist",
"token_type": "bearer"
}))
mocked.get("https://api.github.com/user", status=200, body=dumps({"login": "krol", "id": 54}))
resp = sti.get("/oauth/github?code=12345")
assert resp.status_code == 200
assert resp.headers["Content-Type"] == "text/html; charset=utf-8"
......@@ -107,3 +107,33 @@ def test_reports_endpoint(sti_auth, user, mts_report):
assert 1 == len(resp_reports.json()["data"])
print(resp_reports.json())
assert resp_reports.json()["data"][0]["id"] == report_id
def test_parse_link_report_empty(sti_auth):
test_url = "https://storage.yandexcloud.net/ivanprogramming/empty_report.html"
resp = sti_auth.get(f"/api/loadPageReport?url={test_url}")
assert resp.status_code == 200
data = resp.json()["data"]
report_id = data["indicator_group"]["id"]
resp = sti_auth.get(f"/api/getIndicatorsFromGroup?group_id={report_id}")
assert resp.status_code == 200
assert len(resp.json()["data"]["indicators"]) == 0
def test_parse_link_report(sti_auth):
test_url = "https://storage.yandexcloud.net/ivanprogramming/report.html"
resp = sti_auth.get(f"/api/loadPageReport?url={test_url}")
assert resp.status_code == 200
data = resp.json()["data"]
report_id = data["indicator_group"]["id"]
resp = sti_auth.get(f"/api/getIndicatorsFromGroup?group_id={report_id}")
assert resp.status_code == 200
assert len(resp.json()["data"]["indicators"]) == 3
from os.path import exists
from settings import TELEGRAM_SESSION_NAME
from time import sleep
if __name__ == '__main__':
session_file_name = f"{TELEGRAM_SESSION_NAME}.session" if not TELEGRAM_SESSION_NAME.endswith(
".session") else TELEGRAM_SESSION_NAME
if exists(session_file_name):
print("Session file already exists")
else:
while not exists(session_file_name):
sleep(0.1)
print("Session file created, waiting for user to enter phone number and login")
sleep(60)