commit 28b42492e6716e76e14f7c044a1eab6d206e1bda Author: Namonay Date: Tue Jun 9 17:46:46 2026 +0200 initial commit diff --git a/data/https:__vavaas.dev_static_img_companion.png b/data/https:__vavaas.dev_static_img_companion.png new file mode 100644 index 0000000..4d545bd Binary files /dev/null and b/data/https:__vavaas.dev_static_img_companion.png differ diff --git a/data/https:__vavaas.dev_static_img_htb.jpg b/data/https:__vavaas.dev_static_img_htb.jpg new file mode 100644 index 0000000..edc8ac4 Binary files /dev/null and b/data/https:__vavaas.dev_static_img_htb.jpg differ diff --git a/data/https:__vavaas.dev_static_img_kfs.png b/data/https:__vavaas.dev_static_img_kfs.png new file mode 100644 index 0000000..31a9440 Binary files /dev/null and b/data/https:__vavaas.dev_static_img_kfs.png differ diff --git a/data/https:__vavaas.dev_static_img_monster.png b/data/https:__vavaas.dev_static_img_monster.png new file mode 100644 index 0000000..dce0c61 Binary files /dev/null and b/data/https:__vavaas.dev_static_img_monster.png differ diff --git a/data/https:__vavaas.dev_static_img_override.png b/data/https:__vavaas.dev_static_img_override.png new file mode 100644 index 0000000..002261b Binary files /dev/null and b/data/https:__vavaas.dev_static_img_override.png differ diff --git a/data/https:__vavaas.dev_static_img_writeups_browsed.png b/data/https:__vavaas.dev_static_img_writeups_browsed.png new file mode 100644 index 0000000..3f40c75 Binary files /dev/null and b/data/https:__vavaas.dev_static_img_writeups_browsed.png differ diff --git a/data/https:__vavaas.dev_static_img_writeups_cobblestone.png b/data/https:__vavaas.dev_static_img_writeups_cobblestone.png new file mode 100644 index 0000000..02f621c Binary files /dev/null and b/data/https:__vavaas.dev_static_img_writeups_cobblestone.png differ diff --git a/data/https:__vavaas.dev_static_img_writeups_codeparttwo.png b/data/https:__vavaas.dev_static_img_writeups_codeparttwo.png new file mode 100644 index 0000000..c2c06bc Binary files /dev/null and b/data/https:__vavaas.dev_static_img_writeups_codeparttwo.png differ diff --git a/data/https:__vavaas.dev_static_img_writeups_conversor.png b/data/https:__vavaas.dev_static_img_writeups_conversor.png new file mode 100644 index 0000000..b4826fa Binary files /dev/null and b/data/https:__vavaas.dev_static_img_writeups_conversor.png differ diff --git a/data/https:__vavaas.dev_static_img_writeups_editor.png b/data/https:__vavaas.dev_static_img_writeups_editor.png new file mode 100644 index 0000000..28784d6 Binary files /dev/null and b/data/https:__vavaas.dev_static_img_writeups_editor.png differ diff --git a/data/https:__vavaas.dev_static_img_writeups_expressway.png b/data/https:__vavaas.dev_static_img_writeups_expressway.png new file mode 100644 index 0000000..976c1db Binary files /dev/null and b/data/https:__vavaas.dev_static_img_writeups_expressway.png differ diff --git a/data/https:__vavaas.dev_static_img_writeups_facts.png b/data/https:__vavaas.dev_static_img_writeups_facts.png new file mode 100644 index 0000000..d76be75 Binary files /dev/null and b/data/https:__vavaas.dev_static_img_writeups_facts.png differ diff --git a/data/https:__vavaas.dev_static_img_writeups_previous.png b/data/https:__vavaas.dev_static_img_writeups_previous.png new file mode 100644 index 0000000..b3de82d Binary files /dev/null and b/data/https:__vavaas.dev_static_img_writeups_previous.png differ diff --git a/data/https:__vavaas.dev_static_img_writeups_pterodactyl.png b/data/https:__vavaas.dev_static_img_writeups_pterodactyl.png new file mode 100644 index 0000000..e175974 Binary files /dev/null and b/data/https:__vavaas.dev_static_img_writeups_pterodactyl.png differ diff --git a/ian-logo-240x240.png b/ian-logo-240x240.png new file mode 100644 index 0000000..4b89607 Binary files /dev/null and b/ian-logo-240x240.png differ diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..cd6b55d --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +requests +argparse \ No newline at end of file diff --git a/scorpion.py b/scorpion.py new file mode 100644 index 0000000..e8ece33 --- /dev/null +++ b/scorpion.py @@ -0,0 +1,63 @@ +import sys +import re + + +class Scorpion: + def __init__(self, args): + self.args = self.args_init(args[1:]) + self.signatures = { + "jpg": [b"\xFF\xD8\xFF"], + "png": [b"\x89\x50\x4E\x47\x0D\x0A\x1A\x0A"], + "gif": [b"GIF87a",b"GIF89a"], + "bmp": [b"BM"] + } + self.png_cluster = { + "IHDR": {"Width": 4, "Height": 4, "Bit Depth": 1, "Color Type": 1, "Compression": 1, "Filter": 1, "Interlace": 1} + } + def args_init(self, args: list[str]) -> set[str]: + argset = set() + pattern = re.compile(r'^.*\.(?:jpe?g|png|gif|bmp)$', re.IGNORECASE) + for arg in args: + print(arg) + if not pattern.match(arg): + print("Error : Invalid file") + return + argset.add(arg) + return argset + + def hexdump(self, data: bytes, width: int = 16): + for i in range(0, len(data), width): + chunk = data[i:i+width] + hex_bytes = " ".join(f"{b:02X}" for b in chunk) + ascii_part = "".join(chr(b) if 32 <= b < 127 else "." for b in chunk) + print(f"{i:08X} {hex_bytes:<{width*3}} {ascii_part}") + + def get_file_type(self, data: bytes): + for filetype, sigs in self.signatures.items(): + if any(data.startswith(sig) for sig in sigs): + return filetype + return None + + def run(self): + for arg in self.args: + with open(arg, "rb") as f: + data = f.read() + filetype = self.get_file_type(data) + self.hexdump(data) + match filetype: + case "jpg": + return None + case "png": + self.read_png(data) + case "gif": + return None + case "bmp": + return None + case _: + return None + def read_png(self, data): + for cluster, cluster_values in self.png_cluster.values(): + +if __name__ == "__main__": + scorpion = Scorpion(sys.argv) + scorpion.run() \ No newline at end of file diff --git a/spider.py b/spider.py new file mode 100644 index 0000000..ec99302 --- /dev/null +++ b/spider.py @@ -0,0 +1,130 @@ +import sys +import requests +import argparse +import re +import tqdm +from pathlib import Path + +class Spider: + parser = argparse.ArgumentParser( + description="Spider : An image extraction script", + ) + def __init__(self, target: str, recursive: bool, level: int, path: str): + self.target = target + self.recursive = recursive + self.level = level if level else 5 + self.path = path if path else "./data" + self.href_reg = re.compile(r'href=["\'](.*?)["\']', re.IGNORECASE) + self.img_reg = re.compile(r'src=["\'](.*?\.(?:jpg|jpeg|png|gif|bmp))["\']', re.IGNORECASE) + self.visited = set() + self.imgs = set() + + def run(self): + self.get_page([self.target], self.level if self.recursive else 1) + + if len(self.visited) == 0: + print("Error: couldn't reach target :", self.target) + return + for url in self.visited: + self.get_img(url) + + for img in self.imgs: + self.download_image(img) + + def is_local(self, href): + if not href: + return False + href = href.strip() + if (href in ("", "/", "#")): + return False + if (href.startswith(("http://", "https://", "//", "mailto:", "javascript:")) and not self.target in href): + return False + if (href.endswith(".css")): + return False + return True + + def get_page(self, targets, depth): + if depth == 0 or not targets: + return + + for target in targets: + if target in self.visited: + continue + try: + r = requests.get(target) + except: + continue + if (r.status_code != 200): + continue + self.visited.add(target) + links = self.href_reg.findall(r.text) + next_links = [] + + for l in links: + if (self.is_local(l) and self.target in l): + next_links.append(l) + elif (self.is_local(l)): + next_links.append(self.target + l) + + self.get_page(next_links, depth - 1) + + def get_img(self, url): + try: + r = requests.get(url) + except: + return None + + imgs = self.img_reg.findall(r.text) + + for img in imgs: + if (self.is_local(img) and self.target in img): + self.imgs.add(img) + elif (self.is_local(img)): + self.imgs.add(self.target + img) + + def download_image(self, img): + try: + r = requests.get(img, stream = True) + except: + print("Failed to fetch : " + img) + return + + folder = Path(self.path) + folder.mkdir(parents=True, exist_ok=True) + + file_path = folder / img.replace("/", "_") + + if file_path.exists(): + print("File :" + str(file_path) + " already exists, skipping...") + return + try: + with file_path.open("wb") as handle: + for data in tqdm.tqdm( + r.iter_content(chunk_size=1024), + unit="kB", + total= int(r.headers.get("Content-Length")) // 1024, + desc="Downloading : " + img, + ncols=80 + ): + handle.write(data) + except: + print("Couldn't write to : " + str(file_path)) + return + +def main(): + parser = argparse.ArgumentParser( + description="Spider : An image extraction script", + ) + parser.add_argument("-r", "--recursive", action="store_true", help="Recursively download the image in an URL received as a parameter") + parser.add_argument("-l", "--level", type=int, help="Recursion level (default:5)") + parser.add_argument("-p", "--path", type=str, help="Output folder") + parser.add_argument("url", help="Target URL") + args = parser.parse_args() + if args.level and not args.recursive: + parser.error("-l requires -r") + spider = Spider(args.url, args.recursive, args.level, args.path) + spider.run() + + +if __name__ == "__main__": + main() \ No newline at end of file