#!/usr/bin/env python3
import json
import hashlib
import queue
import re
import threading
import time
from pathlib import Path
import tkinter as tk
from tkinter import filedialog, messagebox, ttk

import requests

VT_API_BASE = 'https://www.virustotal.com/api/v3'
PUBLIC_UPLOAD_URL = f'{VT_API_BASE}/files'
PRIVATE_LARGE_UPLOAD_URL = f'{VT_API_BASE}/private/files/upload_url'
PUBLIC_LARGE_UPLOAD_URL = f'{VT_API_BASE}/files/upload_url'


def sha256_file(path: Path) -> str:
    h = hashlib.sha256()
    with path.open('rb') as f:
        for chunk in iter(lambda: f.read(1024 * 1024), b''):
            h.update(chunk)
    return h.hexdigest()


def sanitize_filename(name: str) -> str:
    name = re.sub(r'[\\/:*?"<>|]+', '_', name)
    name = re.sub(r'\s+', '_', name).strip('._')
    return name or 'unnamed'


def iter_files(target_dir: Path, recursive: bool = True, exts: set[str] | None = None):
    walker = target_dir.rglob('*') if recursive else target_dir.glob('*')
    for p in walker:
        if not p.is_file():
            continue
        if exts and p.suffix.lower() not in exts:
            continue
        yield p


def vt_headers(api_key: str):
    return {
        'x-apikey': api_key.strip(),
        'accept': 'application/json',
    }


def get_upload_url(session: requests.Session, api_key: str, use_private: bool = False):
    headers = vt_headers(api_key)
    url = PRIVATE_LARGE_UPLOAD_URL if use_private else PUBLIC_LARGE_UPLOAD_URL
    resp = session.get(url, headers=headers, timeout=60)
    if resp.status_code == 404 and use_private:
        raise RuntimeError('Private Scanning upload_url endpoint를 사용할 수 없습니다. 라이선스를 확인하세요.')
    resp.raise_for_status()
    return resp.json().get('data')


def upload_file(session: requests.Session, api_key: str, file_path: Path, use_private: bool = False):
    headers = vt_headers(api_key)
    file_size = file_path.stat().st_size
    upload_url = PUBLIC_UPLOAD_URL
    if file_size > 32 * 1024 * 1024:
        upload_url = get_upload_url(session, api_key, use_private=use_private)

    with file_path.open('rb') as f:
        files = {'file': (file_path.name, f)}
        resp = session.post(upload_url, headers=headers, files=files, timeout=300)
    resp.raise_for_status()
    return resp.json()


def get_analysis(session: requests.Session, api_key: str, analysis_id: str):
    headers = vt_headers(api_key)
    url = f'{VT_API_BASE}/analyses/{analysis_id}'
    resp = session.get(url, headers=headers, timeout=60)
    resp.raise_for_status()
    return resp.json()


def wait_for_analysis(session: requests.Session, api_key: str, analysis_id: str, poll_interval: int = 15, max_wait: int = 900):
    start = time.time()
    last_data = None
    while time.time() - start < max_wait:
        data = get_analysis(session, api_key, analysis_id)
        last_data = data
        status = data.get('data', {}).get('attributes', {}).get('status')
        if status == 'completed':
            return data
        time.sleep(poll_interval)
    return last_data



def try_get_file_report(session: requests.Session, api_key: str, sha256: str):
    headers = vt_headers(api_key)
    url = f'{VT_API_BASE}/files/{sha256}'
    resp = session.get(url, headers=headers, timeout=60)
    if resp.status_code == 404:
        return None
    resp.raise_for_status()
    return resp.json()


def request_reanalyze(session: requests.Session, api_key: str, sha256: str):
    headers = vt_headers(api_key)
    url = f'{VT_API_BASE}/files/{sha256}/analyse'
    resp = session.post(url, headers=headers, timeout=60)
    resp.raise_for_status()
    return resp.json()

def get_file_report(session: requests.Session, api_key: str, sha256: str):
    headers = vt_headers(api_key)
    url = f'{VT_API_BASE}/files/{sha256}'
    resp = session.get(url, headers=headers, timeout=60)
    resp.raise_for_status()
    return resp.json()


def build_txt_content(file_name: str, sha256: str, vt_url: str, stats: dict, engine_lines: list[str]):
    lines = [
        f'file_name={file_name}',
        f'sha256={sha256}',
        f'virustotal_url={vt_url}',
        f'malicious={stats.get("malicious", 0)}',
        f'suspicious={stats.get("suspicious", 0)}',
        f'undetected={stats.get("undetected", 0)}',
        f'harmless={stats.get("harmless", 0)}',
        f'timeout={stats.get("timeout", 0)}',
        f'type_unsupported={stats.get("type-unsupported", 0)}',
        '',
        '[malicious_or_suspicious_engines]'
    ]
    if engine_lines:
        lines.extend(engine_lines)
    else:
        lines.append('none')
    return '\n'.join(lines)


def build_html_content(file_name: str, sha256: str, vt_url: str, stats: dict, engines: list[dict], raw_json: dict):
    badge_color = '#d73a49' if stats.get('malicious', 0) > 0 else ('#fb8500' if stats.get('suspicious', 0) > 0 else '#2a9d8f')
    engine_rows = []
    for item in engines:
        row = (
            '<tr>'
            f'<td>{item.get("engine_name","")}</td>'
            f'<td>{item.get("category","")}</td>'
            f'<td>{item.get("result","")}</td>'
            f'<td>{item.get("method","")}</td>'
            f'<td>{item.get("engine_update","")}</td>'
            '</tr>'
        )
        engine_rows.append(row)
    if not engine_rows:
        engine_rows.append('<tr><td colspan="5">malicious/suspicious 결과 없음</td></tr>')

    pretty = json.dumps(raw_json, indent=2, ensure_ascii=False)
    html = f'''<!DOCTYPE html>
<html lang="ko">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>VirusTotal Result - {file_name}</title>
<style>
body {{ font-family: Arial, sans-serif; margin: 24px; background:#f5f7fb; color:#1f2937; }}
.wrap {{ max-width: 1200px; margin: 0 auto; }}
.card {{ background:#fff; border-radius:12px; padding:20px; box-shadow:0 4px 20px rgba(0,0,0,.08); margin-bottom:18px; }}
h1,h2 {{ margin:0 0 12px 0; }}
.meta {{ line-height:1.8; }}
.badge {{ display:inline-block; padding:6px 10px; border-radius:999px; color:#fff; background:{badge_color}; font-weight:700; }}
table {{ width:100%; border-collapse:collapse; }}
th, td {{ border:1px solid #dbe2ea; padding:10px; text-align:left; font-size:14px; }}
th {{ background:#eef3f8; }}
pre {{ background:#0f172a; color:#e2e8f0; padding:16px; border-radius:10px; overflow:auto; font-size:12px; }}
a.button {{ display:inline-block; padding:10px 14px; border-radius:8px; background:#1d4ed8; color:#fff; text-decoration:none; }}
.stats {{ display:grid; grid-template-columns: repeat(auto-fit, minmax(180px, 1fr)); gap:12px; }}
.stat {{ background:#f8fafc; border:1px solid #e5e7eb; border-radius:10px; padding:14px; }}
.label {{ color:#64748b; font-size:13px; }}
.value {{ font-size:24px; font-weight:700; margin-top:6px; }}
</style>
</head>
<body>
<div class="wrap">
  <div class="card">
    <h1>VirusTotal 업로드 결과</h1>
    <div class="meta">
      <div><strong>파일명:</strong> {file_name}</div>
      <div><strong>SHA-256:</strong> <code>{sha256}</code></div>
      <div><strong>바로가기:</strong> <a class="button" href="{vt_url}" target="_blank" rel="noopener noreferrer">VirusTotal 페이지 열기</a></div>
    </div>
  </div>
  <div class="card">
    <h2>요약</h2>
    <div class="stats">
      <div class="stat"><div class="label">Malicious</div><div class="value">{stats.get('malicious', 0)}</div></div>
      <div class="stat"><div class="label">Suspicious</div><div class="value">{stats.get('suspicious', 0)}</div></div>
      <div class="stat"><div class="label">Undetected</div><div class="value">{stats.get('undetected', 0)}</div></div>
      <div class="stat"><div class="label">Harmless</div><div class="value">{stats.get('harmless', 0)}</div></div>
    </div>
    <p style="margin-top:14px;"><span class="badge">검출 상태 확인</span></p>
  </div>
  <div class="card">
    <h2>탐지 엔진</h2>
    <table>
      <thead><tr><th>Engine</th><th>Category</th><th>Result</th><th>Method</th><th>Update</th></tr></thead>
      <tbody>{''.join(engine_rows)}</tbody>
    </table>
  </div>
  <div class="card">
    <h2>Raw JSON</h2>
    <pre>{pretty}</pre>
  </div>
</div>
</body>
</html>'''
    return html


def extract_engine_results(report_json: dict):
    attrs = report_json.get('data', {}).get('attributes', {})
    stats = attrs.get('last_analysis_stats', {})
    results = attrs.get('last_analysis_results', {})

    engines = []
    engine_lines = []
    for _, data in results.items():
        category = data.get('category', '')
        if category in ('malicious', 'suspicious'):
            item = {
                'engine_name': data.get('engine_name', ''),
                'category': category,
                'result': data.get('result', ''),
                'method': data.get('method', ''),
                'engine_update': data.get('engine_update', ''),
            }
            engines.append(item)
            engine_lines.append(
                f"{item['engine_name']} | {item['category']} | {item['result']} | {item['method']} | {item['engine_update']}"
            )
    return stats, engines, engine_lines


def process_file(session: requests.Session, api_key: str, src_file: Path, save_root: Path, poll_interval: int, max_wait: int, use_private: bool = False):
    sha256 = sha256_file(src_file)
    vt_url = f'https://www.virustotal.com/gui/file/{sha256}'

    report_json = try_get_file_report(session, api_key, sha256)

    if report_json is None:
        upload_resp = upload_file(session, api_key, src_file, use_private=use_private)
        analysis_id = upload_resp.get('data', {}).get('id')
        if not analysis_id:
            raise RuntimeError('분석 ID를 받지 못했습니다.')

        analysis_json = wait_for_analysis(session, api_key, analysis_id, poll_interval=poll_interval, max_wait=max_wait)
        status = analysis_json.get('data', {}).get('attributes', {}).get('status')
        if status != 'completed':
            raise RuntimeError(f'분석이 완료되지 않았습니다. status={status}')

        report_json = get_file_report(session, api_key, sha256)

    stats, engines, engine_lines = extract_engine_results(report_json)

    folder_name = sanitize_filename(src_file.parent.name)
    target_folder = save_root / folder_name
    target_folder.mkdir(parents=True, exist_ok=True)

    base_name = sanitize_filename(src_file.stem)
    txt_path = target_folder / f'{base_name}.txt'
    html_path = target_folder / f'{base_name}.html'

    txt_path.write_text(
        build_txt_content(src_file.name, sha256, vt_url, stats, engine_lines),
        encoding='utf-8'
    )
    html_path.write_text(
        build_html_content(src_file.name, sha256, vt_url, stats, engines, report_json),
        encoding='utf-8'
    )

    return {
        'file_name': src_file.name,
        'sha256': sha256,
        'txt_path': str(txt_path),
        'html_path': str(html_path),
        'vt_url': vt_url,
        'malicious': stats.get('malicious', 0),
        'suspicious': stats.get('suspicious', 0),
    }


class App:
    def __init__(self, root):
        self.root = root
        self.root.title('VirusTotal GUI Uploader')
        self.root.geometry('980x720')
        self.log_queue = queue.Queue()

        self.api_key = tk.StringVar()
        self.input_dir = tk.StringVar()
        self.output_dir = tk.StringVar()
        self.ext_filter = tk.StringVar(value='')
        self.recursive = tk.BooleanVar(value=True)
        self.poll_interval = tk.StringVar(value='15')
        self.max_wait = tk.StringVar(value='900')
        self.use_private = tk.BooleanVar(value=False)

        self.build_ui()
        self.root.after(200, self.flush_log)

    def build_ui(self):
        pad = {'padx': 10, 'pady': 8}
        frame = ttk.Frame(self.root)
        frame.pack(fill='both', expand=True, padx=12, pady=12)

        ttk.Label(frame, text='VirusTotal API Key').grid(row=0, column=0, sticky='w', **pad)
        ttk.Entry(frame, textvariable=self.api_key, width=90, show='*').grid(row=0, column=1, columnspan=2, sticky='ew', **pad)

        ttk.Label(frame, text='원본 파일 루트 디렉터리').grid(row=1, column=0, sticky='w', **pad)
        ttk.Entry(frame, textvariable=self.input_dir, width=80).grid(row=1, column=1, sticky='ew', **pad)
        ttk.Button(frame, text='찾기', command=self.pick_input_dir).grid(row=1, column=2, **pad)

        ttk.Label(frame, text='결과 저장 루트 디렉터리').grid(row=2, column=0, sticky='w', **pad)
        ttk.Entry(frame, textvariable=self.output_dir, width=80).grid(row=2, column=1, sticky='ew', **pad)
        ttk.Button(frame, text='찾기', command=self.pick_output_dir).grid(row=2, column=2, **pad)

        ttk.Label(frame, text='확장자 필터 (예: .jsp .jspx .exe .dll)').grid(row=3, column=0, sticky='w', **pad)
        ttk.Entry(frame, textvariable=self.ext_filter, width=80).grid(row=3, column=1, sticky='ew', **pad)

        ttk.Label(frame, text='분석 상태 조회 주기(초)').grid(row=4, column=0, sticky='w', **pad)
        ttk.Entry(frame, textvariable=self.poll_interval, width=12).grid(row=4, column=1, sticky='w', **pad)

        ttk.Label(frame, text='최대 대기 시간(초)').grid(row=5, column=0, sticky='w', **pad)
        ttk.Entry(frame, textvariable=self.max_wait, width=12).grid(row=5, column=1, sticky='w', **pad)

        opt = ttk.Frame(frame)
        opt.grid(row=6, column=1, sticky='w', **pad)
        ttk.Checkbutton(opt, text='하위 폴더 포함', variable=self.recursive).pack(side='left', padx=8)
        ttk.Checkbutton(opt, text='Private Scanning 업로드 URL 사용', variable=self.use_private).pack(side='left', padx=8)

        ttk.Button(frame, text='실행', command=self.start).grid(row=7, column=1, sticky='w', **pad)

        guide = (
            '저장 규칙:\n'
            '- 결과는 출력 루트 아래에 원본 상위 폴더명 기준으로 생성\n'
            '- 예: 출력루트/1.Web/sample.html, sample.txt\n'
            '- TXT에는 file_name만 기록하고 filepath는 기록하지 않음\n'
            '- HTML에는 VT 요약/엔진 결과/Raw JSON 저장\n'
        )
        ttk.Label(frame, text=guide, justify='left').grid(row=8, column=0, columnspan=3, sticky='w', **pad)

        self.log = tk.Text(frame, height=22)
        self.log.grid(row=9, column=0, columnspan=3, sticky='nsew', padx=10, pady=10)

        frame.columnconfigure(1, weight=1)
        frame.rowconfigure(9, weight=1)

    def pick_input_dir(self):
        path = filedialog.askdirectory(title='원본 파일 루트 디렉터리 선택')
        if path:
            self.input_dir.set(path)

    def pick_output_dir(self):
        path = filedialog.askdirectory(title='결과 저장 루트 디렉터리 선택')
        if path:
            self.output_dir.set(path)

    def write_log(self, message: str):
        self.log_queue.put(message)

    def flush_log(self):
        while not self.log_queue.empty():
            msg = self.log_queue.get_nowait()
            self.log.insert('end', msg + '\n')
            self.log.see('end')
        self.root.after(200, self.flush_log)

    def start(self):
        api_key = self.api_key.get().strip()
        input_dir = Path(self.input_dir.get().strip()) if self.input_dir.get().strip() else None
        output_dir = Path(self.output_dir.get().strip()) if self.output_dir.get().strip() else None

        if not api_key:
            messagebox.showerror('오류', 'VirusTotal API Key를 입력하세요.')
            return
        if not input_dir or not input_dir.exists():
            messagebox.showerror('오류', '원본 파일 루트 디렉터리를 확인하세요.')
            return
        if not output_dir:
            messagebox.showerror('오류', '결과 저장 루트 디렉터리를 지정하세요.')
            return

        try:
            poll_interval = int(self.poll_interval.get().strip())
            max_wait = int(self.max_wait.get().strip())
        except ValueError:
            messagebox.showerror('오류', '조회 주기와 최대 대기 시간은 숫자여야 합니다.')
            return

        ext_raw = self.ext_filter.get().strip()
        exts = None
        if ext_raw:
            exts = {e.lower() if e.startswith('.') else f'.{e.lower()}' for e in ext_raw.split()}

        threading.Thread(
            target=self.run_job,
            args=(api_key, input_dir, output_dir, poll_interval, max_wait, exts),
            daemon=True,
        ).start()

    def run_job(self, api_key: str, input_dir: Path, output_dir: Path, poll_interval: int, max_wait: int, exts: set[str] | None):
        summary = []
        session = requests.Session()
        try:
            output_dir.mkdir(parents=True, exist_ok=True)
            files = list(iter_files(input_dir, recursive=self.recursive.get(), exts=exts))
            if not files:
                self.write_log('[!] 처리할 파일이 없습니다.')
                return

            self.write_log(f'[*] 총 {len(files)}개 파일 업로드/분석 시작')
            for i, file_path in enumerate(files, 1):
                try:
                    result = process_file(
                        session,
                        api_key,
                        file_path,
                        output_dir,
                        poll_interval,
                        max_wait,
                        use_private=self.use_private.get(),
                    )
                    msg = (
                        f"[{i}/{len(files)}] OK | {result['file_name']} | "
                        f"malicious={result['malicious']} suspicious={result['suspicious']} | {result['html_path']}"
                    )
                    self.write_log(msg)
                    summary.append('\t'.join([
                        result['file_name'],
                        result['sha256'],
                        str(result['malicious']),
                        str(result['suspicious']),
                        result['html_path'],
                        result['txt_path'],
                        result['vt_url'],
                    ]))
                except Exception as e:
                    msg = f'[{i}/{len(files)}] ERR | {file_path.name} | {e}'
                    self.write_log(msg)
                    summary.append(msg)
        except Exception as e:
            self.write_log(f'[FATAL] {e}')
        finally:
            summary_path = output_dir / 'summary.txt'
            summary_path.write_text('\n'.join(summary), encoding='utf-8')
            self.write_log(f'[*] summary 저장 완료: {summary_path}')
            self.write_log('[*] 작업 종료')
            session.close()


def main():
    root = tk.Tk()
    App(root)
    root.mainloop()


if __name__ == '__main__':
    main()
