Source

Target

Showing with 163 additions and 30 deletions
+163 -30
venv/
README.md
.env
.coverage
key.jwk
*.session
*.sqlite3
...@@ -3,4 +3,4 @@ WORKDIR /app ...@@ -3,4 +3,4 @@ WORKDIR /app
COPY requirements.txt . COPY requirements.txt .
RUN pip install -r requirements.txt RUN pip install -r requirements.txt
COPY . . COPY . .
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8000"] CMD python ./wait_for_client.py && python ./app.py generate_key && uvicorn app:app --host 0.0.0.0 --port 8000
...@@ -29,26 +29,34 @@ Monolithic backend of IoC scraper service ...@@ -29,26 +29,34 @@ Monolithic backend of IoC scraper service
1. Install docker 1. Install docker
2. Save enviroment variables in .env file 2. Save enviroment variables in .env file
3. Run docker image 3. Run docker image with .env file mounted
```bash ```bash
docker run -d --env-file .env -p 8000:8000 --name backend registry.miem.hse.ru/indicators-parser-saas/backend docker run -d --env-file .env -p 8000:8000 --name backend -it registry.miem.hse.ru/indicators-parser-saas/backend
``` ```
4. In another terminal run client generation
```bash
docker exec -it backend python ./app.py
```
## Settings ## Settings
| **Name** | **Description** | **Is required?** | | **Name** | **Description** | **Is required?** |
|:----------------------:|:-------------------------------------------------------------------------------------------------------------------:|:----------------:| |:-----------------------:|:-------------------------------------------------------------------------------------------------------------------:|:----------------:|
| `DB_URI` | Database [URL](https://tortoise.github.io/databases.html) | ✅ | | `DB_URI` | Database [URL](https://tortoise.github.io/databases.html) | ✅ |
| `GITHUB_CLIENT_ID` | [GitHub OAuth](https://docs.github.com/en/developers/apps/building-oauth-apps/authorizing-oauth-apps) client id | ✅ | | `GITHUB_CLIENT_ID` | [GitHub OAuth](https://docs.github.com/en/developers/apps/building-oauth-apps/authorizing-oauth-apps) client id | ✅ |
| `GITHUB_CLIENT_SECRET` | [GitHub OAuth](https://docs.github.com/en/developers/apps/building-oauth-apps/authorizing-oauth-apps) client secret | ✅ | | `GITHUB_CLIENT_SECRET` | [GitHub OAuth](https://docs.github.com/en/developers/apps/building-oauth-apps/authorizing-oauth-apps) client secret | ✅ |
| `JWK_KEY_FILE` | RSA Private and Public key in Json Web Key format (key.jwk by default) | | | `JWK_KEY_FILE` | RSA Private and Public key in Json Web Key format (`key.jwk` by default) | |
| `S3_SECRET_KEY` | AWS S3 or S3 like storage static key secret | ✅ | | `S3_SECRET_KEY` | AWS S3 or S3 like storage static key secret | ✅ |
| `S3_ACCESS_KEY` | AWS S3 or S3 like storage static access key | ✅ | | `S3_ACCESS_KEY` | AWS S3 or S3 like storage static access key | ✅ |
| `S3_REGION_NAME` | AWS S3 or S3 like storage region name | ✅ | | `S3_REGION_NAME` | AWS S3 or S3 like storage region name | ✅ |
| `S3_ENDPOINT_URL` | AWS S3 or S3 like storage endpoint URL | ✅ | | `S3_ENDPOINT_URL` | AWS S3 or S3 like storage endpoint URL | ✅ |
| `S3_REPORTS_FOLDER` | AWS S3 or S3 like storage reports base path | | | `S3_REPORTS_FOLDER` | AWS S3 or S3 like storage reports base path | |
| `S3_BASE_URL` | Base URL for generating not signed URLs | ✅ | | `S3_BASE_URL` | Base URL for generating not signed URLs | ✅ |
| `TELEGRAM_API_ID` | Telegram API ID from [my.telegram.org](https://my.telegram.org) | ✅ |
| `TELEGRAM_API_HASH` | Telegram API hash from [my.telegram.org](https://my.telegram.org) | ✅ |
| `TELEGRAM_SESSION_NAME` | Telegram session file name (`ioc` by default) | |
## Documentation ## Documentation
`//TODO: Add documentation here` Documentation is powered by [Sphinx](https://www.sphinx-doc.org/en/master/) and available
at [Read The Docs](https://ioc-backend.readthedocs.io/en/latest/genindex.html)
...@@ -2,7 +2,6 @@ from os.path import exists ...@@ -2,7 +2,6 @@ from os.path import exists
from loguru import logger from loguru import logger
from starlette.applications import Starlette from starlette.applications import Starlette
from tortoise import Tortoise
from sys import argv from sys import argv
from tortoise.contrib.starlette import register_tortoise from tortoise.contrib.starlette import register_tortoise
...@@ -12,14 +11,9 @@ from auth.utils import create_and_save_key ...@@ -12,14 +11,9 @@ from auth.utils import create_and_save_key
from endpoints.routes import routes from endpoints.routes import routes
from responses.errors import ApiError, handle_api_error from responses.errors import ApiError, handle_api_error
from settings import DB_URI from settings import DB_URI
from integrations.telegram import client from integrations.telegram import start, client
app = Starlette(routes=routes, on_startup=[key.load_key, start],
async def start_client():
await client.start()
app = Starlette(routes=routes, on_startup=[key.load_key, start_client], on_shutdown=[client.disconnect],
exception_handlers={ exception_handlers={
ApiError: handle_api_error ApiError: handle_api_error
}) })
......
...@@ -4,4 +4,5 @@ from .user import get_me, create_bot_token, get_bot_tokens, delete_bot_token ...@@ -4,4 +4,5 @@ from .user import get_me, create_bot_token, get_bot_tokens, delete_bot_token
from .parser import load_report from .parser import load_report
from .indicators import get_indicators_from_group, get_indicator_groups from .indicators import get_indicators_from_group, get_indicator_groups
from .watcher import create_watcher from .watcher import create_watcher
from .parser import get_reports from .parser import get_reports, get_page_report
from .static import token
...@@ -27,7 +27,7 @@ async def github_oauth_callback(request: Request): ...@@ -27,7 +27,7 @@ async def github_oauth_callback(request: Request):
) )
await user.save() await user.save()
token = user.create_token() token = user.create_token()
resp = RedirectResponse("/", 301) resp = RedirectResponse("/token", 301)
resp.set_cookie("token", token) resp.set_cookie("token", token)
return resp return resp
......
...@@ -2,6 +2,7 @@ import os ...@@ -2,6 +2,7 @@ import os
from uuid import uuid4 from uuid import uuid4
from tempfile import NamedTemporaryFile from tempfile import NamedTemporaryFile
from aiohttp import request as aiohttp_request
from starlette.background import BackgroundTasks from starlette.background import BackgroundTasks
from starlette.datastructures import UploadFile from starlette.datastructures import UploadFile
from starlette.requests import Request from starlette.requests import Request
...@@ -11,8 +12,8 @@ from models import Report, IndicatorGroup ...@@ -11,8 +12,8 @@ from models import Report, IndicatorGroup
from models.indicator import IndicatorGroupPD from models.indicator import IndicatorGroupPD
from models.report import ReportPD from models.report import ReportPD
from parsers.pdf_parser import process_pdf from parsers.pdf_parser import process_pdf
from parsers.text_parser import CollectedData from parsers.text_parser import CollectedData, find_ioc
from responses.errors import ReportNotPresented from responses.errors import ReportNotPresented, ReportURLError
from responses.responses import OkResponse from responses.responses import OkResponse
...@@ -50,3 +51,21 @@ async def get_reports(request: Request): ...@@ -50,3 +51,21 @@ async def get_reports(request: Request):
report.owner = request.state.user report.owner = request.state.user
as_pd.append(ReportPD.from_orm(report)) as_pd.append(ReportPD.from_orm(report))
return OkResponse(as_pd) return OkResponse(as_pd)
async def get_page_report(request: Request):
""" Creates group from news page """
url = request.query_params["url"]
try:
async with aiohttp_request("GET", url) as resp:
page = await resp.text()
if resp.status != 200:
raise ReportURLError
except Exception:
raise ReportURLError
data = find_ioc(page.lower())
group = await IndicatorGroup.from_reports_collected_data(data, request.state.user)
group.description = "Group from page " + url
await group.save()
group_pd = IndicatorGroupPD.from_orm(group)
return OkResponse({"indicator_group": group_pd})
...@@ -7,7 +7,8 @@ from auth.middleware import JWTAuthenticationMiddleware ...@@ -7,7 +7,8 @@ from auth.middleware import JWTAuthenticationMiddleware
unauthenticated_routes = [ unauthenticated_routes = [
Route("/ping", ping, methods=["GET"]), Route("/ping", ping, methods=["GET"]),
Route("/login/github", github_oauth_redirect, methods=["GET"]), Route("/login/github", github_oauth_redirect, methods=["GET"]),
Route("/oauth/github", github_oauth_callback, methods=["GET"]) Route("/oauth/github", github_oauth_callback, methods=["GET"]),
Route("/token", token, methods=["GET"]),
] ]
api_routes = [ api_routes = [
...@@ -19,7 +20,8 @@ api_routes = [ ...@@ -19,7 +20,8 @@ api_routes = [
Route("/deleteBotToken", delete_bot_token, methods=["GET"]), Route("/deleteBotToken", delete_bot_token, methods=["GET"]),
Route("/createWatcher", create_watcher, methods=["POST"]), Route("/createWatcher", create_watcher, methods=["POST"]),
Route("/getIndicatorGroups", get_indicator_groups, methods=["GET"]), Route("/getIndicatorGroups", get_indicator_groups, methods=["GET"]),
Route("/getReports", get_reports, methods=["GET"]) Route("/getReports", get_reports, methods=["GET"]),
Route("/loadPageReport", get_page_report, methods=["GET"]),
] ]
admin_routes = [] admin_routes = []
......
from h11 import Request
from starlette.responses import HTMLResponse
token_page = """
<!doctype html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport"
content="width=device-width, user-scalable=no, initial-scale=1.0, maximum-scale=1.0, minimum-scale=1.0">
<meta http-equiv="X-UA-Compatible" content="ie=edge">
<title>Token create</title>
</head>
<body>
<div style="text-align: center;">
Created token: <span id="token"></span>
</div>
<script>
// Get token from cookie
const token = document.cookie.split('; ').find(row => row.startsWith('token=')).split('=')[1];
if (token) {
document.getElementById('token').innerText = token;
}
</script>
</body>
</html>
"""
async def token(request: Request):
""" Returns page, that shows the token """
return HTMLResponse(token_page)
...@@ -25,3 +25,8 @@ async def on_message(event: Message): ...@@ -25,3 +25,8 @@ async def on_message(event: Message):
owner_id=watcher.owner_id) owner_id=watcher.owner_id)
if len(created_indicators) > 0: if len(created_indicators) > 0:
await Indicator.bulk_create(created_indicators) await Indicator.bulk_create(created_indicators)
async def start():
""" Starts the Telegram client """
await client.start()
...@@ -87,3 +87,10 @@ class TelegramInvalidLink(ApiError): ...@@ -87,3 +87,10 @@ class TelegramInvalidLink(ApiError):
code = 604 code = 604
description = "Telegram channel join link is invalid" description = "Telegram channel join link is invalid"
http_code = 400 http_code = 400
class ReportURLError(ApiError):
""" Report URL is invalid """
code = 701
description = "Report URL is invalid, or 200 status code is not returned"
http_code = 422
...@@ -45,3 +45,16 @@ def test_github_oauth_redirect(sti): ...@@ -45,3 +45,16 @@ def test_github_oauth_redirect(sti):
redirect_url = urlparse(resp.headers["Location"]) redirect_url = urlparse(resp.headers["Location"])
assert redirect_url.path == "/login/oauth/authorize" assert redirect_url.path == "/login/oauth/authorize"
assert redirect_url.hostname == "github.com" assert redirect_url.hostname == "github.com"
def test_login_redirect_token_page(mocked, sti):
mocked.post("https://github.com/login/oauth/access_token", status=200, body=dumps({
"access_token": "ghAAAA",
"scope": "repo,gist",
"token_type": "bearer"
}))
mocked.get("https://api.github.com/user", status=200, body=dumps({"login": "krol", "id": 54}))
resp = sti.get("/oauth/github?code=12345")
assert resp.status_code == 200
assert resp.headers["Content-Type"] == "text/html; charset=utf-8"
...@@ -107,3 +107,33 @@ def test_reports_endpoint(sti_auth, user, mts_report): ...@@ -107,3 +107,33 @@ def test_reports_endpoint(sti_auth, user, mts_report):
assert 1 == len(resp_reports.json()["data"]) assert 1 == len(resp_reports.json()["data"])
print(resp_reports.json()) print(resp_reports.json())
assert resp_reports.json()["data"][0]["id"] == report_id assert resp_reports.json()["data"][0]["id"] == report_id
def test_parse_link_report_empty(sti_auth):
test_url = "https://storage.yandexcloud.net/ivanprogramming/empty_report.html"
resp = sti_auth.get(f"/api/loadPageReport?url={test_url}")
assert resp.status_code == 200
data = resp.json()["data"]
report_id = data["indicator_group"]["id"]
resp = sti_auth.get(f"/api/getIndicatorsFromGroup?group_id={report_id}")
assert resp.status_code == 200
assert len(resp.json()["data"]["indicators"]) == 0
def test_parse_link_report(sti_auth):
test_url = "https://storage.yandexcloud.net/ivanprogramming/report.html"
resp = sti_auth.get(f"/api/loadPageReport?url={test_url}")
assert resp.status_code == 200
data = resp.json()["data"]
report_id = data["indicator_group"]["id"]
resp = sti_auth.get(f"/api/getIndicatorsFromGroup?group_id={report_id}")
assert resp.status_code == 200
assert len(resp.json()["data"]["indicators"]) == 3
from os.path import exists
from settings import TELEGRAM_SESSION_NAME
from time import sleep
if __name__ == '__main__':
session_file_name = f"{TELEGRAM_SESSION_NAME}.session" if not TELEGRAM_SESSION_NAME.endswith(
".session") else TELEGRAM_SESSION_NAME
if exists(session_file_name):
print("Session file already exists")
else:
while not exists(session_file_name):
sleep(0.1)
print("Session file created, waiting for user to enter phone number and login")
sleep(60)