feat(movie): Finish movie parser

Finish movie parser, not yet implemented actual moving.
Also not yet implemented series.
This commit is contained in:
Andreas Mieke 2023-11-06 23:12:19 +01:00
parent 0ecd7378ce
commit 18882983a2
6 changed files with 459 additions and 1226 deletions

1118
Cargo.lock generated

File diff suppressed because it is too large Load diff

View file

@ -8,12 +8,13 @@ edition = "2021"
[dependencies]
clap = { version = "4.4.7", features = ["derive"] }
infer = "0.15.0"
inline_colorization = "0.1.6"
inquire = "0.6.2"
log = "0.4.20"
reqwest = { version = "0.11.22", features = ["json", "blocking"] }
sanitise-file-name = "1.0.0"
serde = { version = "1.0.190", features = ["derive"] }
serde_json = "1.0.108"
stderrlog = "0.5.4"
tmdb_client = "1.6.0"
urlencoding = "2.1.3"
walkdir = "2.4.0"

View file

@ -1,6 +1,8 @@
use std::{path::PathBuf, fs::{self, DirEntry}, error::Error};
use crate::{media::handle_media, config::Config};
use log::trace;
use crate::{movie::{handle_movie_files_and_folders, self, Move}, config::Config};
/*fn is_not_hidden(entry: &DirEntry) -> bool {
entry
@ -20,17 +22,17 @@ pub fn walk_path(path: PathBuf) -> Vec<PathBuf> {
entries
}*/
pub fn search_path(path: PathBuf, cfg: Config) -> Result<(), Box<dyn Error>> {
pub fn search_path(path: PathBuf, cfg: Config) -> Result<Vec<Move>, Box<dyn Error>> {
let entries = fs::read_dir(path)?;
let mut files: Vec<DirEntry> = Vec::new();
let mut dirs: Vec<DirEntry> = Vec::new();
let mut folders: Vec<DirEntry> = Vec::new();
// Put all files and folders in corresponding vectors
for entry in entries {
if let Ok(entry) = entry {
if let Ok(file_type) = entry.file_type() {
if file_type.is_dir() {
dirs.push(entry);
folders.push(entry);
} else if file_type.is_file() {
files.push(entry);
}
@ -38,14 +40,15 @@ pub fn search_path(path: PathBuf, cfg: Config) -> Result<(), Box<dyn Error>> {
}
}
if dirs.len() == 0 {
// No folders present, assuming there are only distinct media files
for file in files {
handle_media(file, cfg.clone());
}
}
folders.sort_by(|a, b| b.metadata().unwrap().len().cmp(&a.metadata().unwrap().len()));
files.sort_by(|a, b| b.metadata().unwrap().len().cmp(&a.metadata().unwrap().len()));
trace!("Sorted Dirs: {:#?}", folders);
trace!("Sorted Files: {:#?}", files);
Ok(())
let mut moves: Vec<movie::Move> = Vec::new();
moves.append(&mut handle_movie_files_and_folders(files, folders, cfg.clone()));
Ok(moves)
}
/*

View file

@ -1,6 +1,6 @@
mod config;
mod directory;
mod media;
mod movie;
use log::*;
use clap::Parser;
@ -21,6 +21,14 @@ struct Args {
#[arg(short, long)]
first_run: bool,
/// Move files rather than copying them
#[arg(short, long, name="move")]
moov: bool,
/// Look for shows instead of movies
#[arg(short, long)]
shows: bool,
/// Custom config file
#[arg(short, long, value_name = "FILE")]
config: Option<PathBuf>,
@ -64,8 +72,14 @@ fn main() {
};
//let files = directory::walk_path(search_path);
directory::search_path(search_path, cfg).unwrap();
let moves = directory::search_path(search_path, cfg).unwrap();
for move_file in moves {
info!("Moving: {:#?}: {:#?}", args.moov, move_file);
_ = move_file.from;
_ = move_file.to;
}
/*for file in files.clone() {
info!("Found: {}", file.to_str().unwrap());
}*/

View file

@ -1,182 +0,0 @@
use std::{path::PathBuf, error::Error, io::Read, fs::{File, DirEntry}, cmp, fmt, ops::Deref};
use infer;
use inquire::Select;
use log::{info, warn, error, trace, debug};
use serde::Deserialize;
use urlencoding::encode;
use crate::config::Config;
#[derive(Deserialize, Debug)]
struct TMDBResponse {
page: i32,
results: Vec<TMDBEntry>,
total_pages: i32,
total_results: i32
}
#[derive(Deserialize, Debug)]
struct TMDBEntry {
id: i32,
#[serde(alias = "name")]
title: String,
original_language: Option<String>,
#[serde(alias = "original_name")]
original_title: String,
overview: Option<String>,
media_type: String,
popularity: f32,
#[serde(alias = "first_air_date")]
release_date: Option<String>,
}
impl fmt::Display for TMDBEntry {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
if self.media_type == "movie" {
write!(f, "[MOVIE] {} ({}, {}) (ID: {})", self.title, self.release_date.clone().unwrap_or("unknown".to_string()), self.original_language.as_ref().unwrap(), self.id)
} else if self.media_type == "tv" {
write!(f, "[SHOW] {} ({}, {}) (ID: {})", self.title, self.release_date.clone().unwrap_or("unknown".to_string()), self.original_language.as_ref().unwrap(), self.id)
} else {
write!(f, "[{}] {} (ID: {})", self.media_type, self.title, self.id)
}
}
}
fn get_file_header(path: PathBuf) -> Result<Vec<u8>, Box<dyn Error>> {
let f = File::open(path)?;
let limit = f
.metadata()
.map(|m| cmp::min(m.len(), 8192) as usize + 1)
.unwrap_or(0);
let mut bytes = Vec::with_capacity(limit);
f.take(8192).read_to_end(&mut bytes)?;
Ok(bytes)
}
fn token_valid(t: &&str) -> bool {
if
t.eq_ignore_ascii_case("dvd") ||
t.eq_ignore_ascii_case("bluray") ||
t.eq_ignore_ascii_case("webrip") ||
t.eq_ignore_ascii_case("youtube") ||
t.eq_ignore_ascii_case("download") ||
t.eq_ignore_ascii_case("web") ||
t.eq_ignore_ascii_case("uhd") ||
t.eq_ignore_ascii_case("hd") ||
t.eq_ignore_ascii_case("tv") ||
t.eq_ignore_ascii_case("tvrip") ||
t.eq_ignore_ascii_case("1080p") ||
t.eq_ignore_ascii_case("1080i") ||
t.eq_ignore_ascii_case("2160p") ||
t.eq_ignore_ascii_case("x264") ||
t.eq_ignore_ascii_case("x265") ||
t.eq_ignore_ascii_case("h265") ||
t.eq_ignore_ascii_case("dts") ||
t.eq_ignore_ascii_case("hevc") ||
t.eq_ignore_ascii_case("10bit") ||
t.eq_ignore_ascii_case("12bit") ||
t.eq_ignore_ascii_case("hdr") ||
t.eq_ignore_ascii_case("xvid") ||
t.eq_ignore_ascii_case("AAC5") ||
t.eq_ignore_ascii_case("AAC") ||
t.eq_ignore_ascii_case("AC3") ||
t.eq_ignore_ascii_case("sample") || // This just removes the word sample, maybe we want to ban files with the word sample all together
(t.starts_with('[') || t.ends_with(']')) ||
(t.starts_with('(') || t.ends_with(')')) ||
(t.starts_with('{') || t.ends_with('}'))
{
return false;
}
true
}
fn tokenize_media_name(file_name: String) -> Vec<String> {
let mut tokens: Vec<String> = file_name.split(&['-', ' ', ':', '@', '.'][..]).filter(|t| token_valid(t)).map(String::from).collect();
trace!("Tokens are: {:#?}", tokens);
// Remove last token (file ext)
_ = tokens.pop();
tokens
}
fn lookup_media(file_name: PathBuf, mut name_tokens: Vec<String>, cfg: Config) -> Option<TMDBEntry> {
let mut h = reqwest::header::HeaderMap::new();
h.insert("Accept", reqwest::header::HeaderValue::from_static("application/json"));
h.insert("Authorization", reqwest::header::HeaderValue::from_str(format!("Bearer {}", cfg.tmdb_key).as_str()).unwrap());
let client = reqwest::blocking::Client::builder()
.default_headers(h)
.build().unwrap();
let mut response: TMDBResponse;
loop {
if name_tokens.len() == 0 {
error!("Could not find title on TMDB!");
return None;
}
let name = name_tokens.join(" ");
trace!("Searching on TMDB for {:#?}", name);
let http_response = client
.get(format!("https://api.themoviedb.org/3/search/multi?query={}&include_adult=false&language=en-US&page=1", encode(name.as_str()).into_owned()))
.send().unwrap();
response = http_response.json::<TMDBResponse>().unwrap();
trace!("TMDB Reponse: {:#?}", response);
if response.total_results == 0 {
name_tokens.pop();
} else {
break;
}
}
let options = response.results;
let ans = Select::new(format!("Select movie or show that matches the file \x1b[93m{}\x1b[0m:", file_name.display()).as_str(), options).prompt();
match ans {
Ok(choice) => {
debug!("Selected: {:#?}", choice);
return Some(choice);
},
Err(e) => {
error!("Error while selecting content: {:#?}", e);
return None;
},
}
}
fn video_file_handler(entry: DirEntry, cfg: Config) {
let path = entry.path();
info!("Found video file: {:#?}", path);
let file_name = path.file_name().unwrap_or_default();
trace!("File name is: {:#?}", file_name);
let name_tokens = tokenize_media_name(file_name.to_str().unwrap_or_default().to_string());
match lookup_media(entry.path(), name_tokens, cfg) {
Some(entry) => todo!("Save media info in some struct to move media afterwards, or move directly"),
None => {},
}
}
pub fn handle_media(entry: DirEntry, cfg: Config) {
if entry.file_type().is_ok_and(|t| t.is_dir()) {
warn!("Directory passed to handle_media, {:#?} will be skipped", entry);
return
}
match get_file_header(entry.path()) {
Ok(header) => {
// Handle video files
if infer::is_video(&header) {
video_file_handler(entry, cfg.clone());
}
},
Err(error) => error!("Can not get file header for {:#?}, Error: {:#?}", entry, error),
}
}

339
src/movie.rs Normal file
View file

@ -0,0 +1,339 @@
use std::{path::PathBuf, error::Error, io::Read, fs::{File, DirEntry}, cmp, fmt};
use infer;
use inquire::{Select, Text, Confirm};
use log::{info, warn, error, trace, debug};
use reqwest::{blocking::Client, header::{HeaderMap, HeaderValue}};
use serde::Deserialize;
use urlencoding::encode;
use inline_colorization::*;
use sanitise_file_name::sanitise;
use walkdir::WalkDir;
use crate::{config::Config, directory::search_path};
#[derive(Deserialize, Debug)]
struct TMDBResponse {
results: Vec<TMDBEntry>,
total_results: i32
}
#[derive(Deserialize, Debug, Clone)]
struct TMDBEntry {
id: i32,
#[serde(alias = "name")]
title: String,
original_language: Option<String>,
media_type: String,
#[serde(alias = "first_air_date")]
release_date: Option<String>,
}
impl fmt::Display for TMDBEntry {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
if self.media_type == "movie" {
write!(f, "[MOVIE] {} ({}, {}) (ID: {})", self.title, self.release_date.clone().unwrap_or("unknown".to_string()), self.original_language.as_ref().unwrap(), self.id)
} else if self.media_type == "tv" {
write!(f, "[SHOW] {} ({}, {}) (ID: {})", self.title, self.release_date.clone().unwrap_or("unknown".to_string()), self.original_language.as_ref().unwrap(), self.id)
} else {
write!(f, "[{}] {} (ID: {})", self.media_type, self.title, self.id)
}
}
}
#[derive(Debug)]
pub struct Move {
pub from: PathBuf,
pub to: PathBuf
}
fn get_file_header(path: PathBuf) -> Result<Vec<u8>, Box<dyn Error>> {
let f = File::open(path)?;
let limit = f
.metadata()
.map(|m| cmp::min(m.len(), 8192) as usize + 1)
.unwrap_or(0);
let mut bytes = Vec::with_capacity(limit);
f.take(8192).read_to_end(&mut bytes)?;
Ok(bytes)
}
fn token_valid(t: &&str) -> bool {
if
t.eq_ignore_ascii_case("dvd") ||
t.eq_ignore_ascii_case("bluray") ||
t.eq_ignore_ascii_case("webrip") ||
t.eq_ignore_ascii_case("youtube") ||
t.eq_ignore_ascii_case("download") ||
t.eq_ignore_ascii_case("web") ||
t.eq_ignore_ascii_case("uhd") ||
t.eq_ignore_ascii_case("hd") ||
t.eq_ignore_ascii_case("tv") ||
t.eq_ignore_ascii_case("tvrip") ||
t.eq_ignore_ascii_case("1080p") ||
t.eq_ignore_ascii_case("1080i") ||
t.eq_ignore_ascii_case("2160p") ||
t.eq_ignore_ascii_case("x264") ||
t.eq_ignore_ascii_case("x265") ||
t.eq_ignore_ascii_case("h265") ||
t.eq_ignore_ascii_case("dts") ||
t.eq_ignore_ascii_case("hevc") ||
t.eq_ignore_ascii_case("10bit") ||
t.eq_ignore_ascii_case("12bit") ||
t.eq_ignore_ascii_case("hdr") ||
t.eq_ignore_ascii_case("xvid") ||
t.eq_ignore_ascii_case("AAC5") ||
t.eq_ignore_ascii_case("AAC") ||
t.eq_ignore_ascii_case("AC3") ||
t.eq_ignore_ascii_case("sample") || // This just removes the word sample, maybe we want to ban files with the word sample all together
(t.starts_with('[') || t.ends_with(']')) ||
(t.starts_with('(') || t.ends_with(')')) ||
(t.starts_with('{') || t.ends_with('}'))
{
return false;
}
true
}
fn tokenize_media_name(file_name: String) -> Vec<String> {
let mut tokens: Vec<String> = file_name.split(&['-', ' ', ':', '@', '.'][..]).filter(|t| token_valid(t)).map(String::from).collect();
trace!("Tokens are: {:#?}", tokens);
// Remove last token (file ext)
_ = tokens.pop();
tokens
}
fn lookup_media(file_name: PathBuf, mut name_tokens: Vec<String>, cfg: Config) -> Option<TMDBEntry> {
let mut h = HeaderMap::new();
h.insert("Accept", HeaderValue::from_static("application/json"));
h.insert("Authorization", HeaderValue::from_str(format!("Bearer {}", cfg.tmdb_key).as_str()).unwrap());
let client = Client::builder()
.default_headers(h)
.build().unwrap();
let mut response: TMDBResponse;
loop {
if name_tokens.len() == 0 {
error!("Could not find title on TMDB!");
return None;
}
let name = name_tokens.join(" ");
trace!("Searching on TMDB for {:#?}", name);
let http_response = client
.get(format!("https://api.themoviedb.org/3/search/multi?query={}&include_adult=false&language=en-US&page=1", encode(name.as_str()).into_owned()))
.send().unwrap();
response = http_response.json::<TMDBResponse>().unwrap();
trace!("TMDB Reponse: {:#?}", response);
if response.total_results == 0 {
name_tokens.pop();
} else {
break;
}
}
let options = response.results;
let ans = Select::new(format!("Select movie or show that matches the file {style_bold}{}{style_reset}:", file_name.display()).as_str(), options).prompt();
match ans {
Ok(choice) => {
debug!("Selected: {:#?}", choice);
return Some(choice);
},
Err(e) => {
error!("Error while selecting content: {:#?}", e);
return None;
},
}
}
fn video_file_handler(entry: PathBuf, cfg: Config) -> Option<TMDBEntry> {
info!("Found video file: {:#?}", entry);
let file_name = entry.file_name().unwrap_or_default();
trace!("File name is: {:#?}", file_name);
let name_tokens = tokenize_media_name(file_name.to_str().unwrap_or_default().to_string());
lookup_media(entry, name_tokens, cfg)
}
pub fn handle_movie_files_and_folders(files: Vec<DirEntry>, folders: Vec<DirEntry>, cfg: Config) -> Vec<Move> {
let mut moves: Vec<Move> = Vec::new();
let mut primary_media: Option<TMDBEntry> = None; // Assuming first file (biggest file) is primary media, store the information of this, for the rest, do lazy matching for extra content/subs and so on
for file in files {
check_movie_file(file.path(), &mut primary_media, &cfg, &mut moves);
}
match primary_media {
Some(_) => {
// There is already primary media, check directories for more media for same movie
for folder in folders {
for entry in WalkDir::new(folder.path()) {
match entry {
Ok(entry) => {
if entry.file_type().is_file() {
check_movie_file(entry.into_path(), &mut primary_media, &cfg, &mut moves);
}
},
Err(e) => {
error!("Error walking the directory: {:#?}", e);
continue;
}
}
}
}
},
None => {
// There is no primary media yet, try every folder as main folder
for folder in folders {
moves.append(&mut search_path(folder.path(), cfg.clone()).unwrap());
}
}
}
moves
}
fn check_movie_file(file: PathBuf, primary_media: &mut Option<TMDBEntry>, cfg: &Config, moves: &mut Vec<Move>) {
trace!("Checking {:#?}", file);
match get_file_header(file.clone()) {
Ok(header) => {
// Handle video files
if infer::is_video(&header) {
match primary_media.as_ref() {
None => {
// No primary media found yet, look up media on TMDB
match video_file_handler(file.clone(), cfg.clone()) {
Some(meta) => {
*primary_media = Some(meta.clone());
let original_path = file;
let ext = original_path.extension().unwrap_or_default();
let new_path = cfg.plex_library.join(format!("Movies/{0} {{tmdb-{1}}}/{0} {{tmdb-{1}}}.{2}", sanitise(meta.title.as_str()), meta.id, ext.to_str().unwrap_or_default()));
moves.push(Move { from: original_path, to: new_path });
},
None => {
warn!("Could not find a TMDB entry for {:#?}", file);
return;
},
}
},
Some(primary_media) => {
// No additional TMDB lookup needed, treat media as extras
let extra_types: Vec<&str> = vec!["Ignore", "Edition", "Behind The Scenes", "Deleted Scenes", "Featurettes", "Interviews", "Scenes", "Shorts", "Trailers", "Other"];
let ans = Select::new(format!("Select extra type {style_bold}{}{style_reset} (Ignore to ignore the file, Edition to treat it as alternate edition of the main movie):", file.display()).as_str(), extra_types).prompt();
match ans {
Ok(choice) => {
if choice == "Ignore" {
// Ignoring the given file
return;
}
if choice == "Edition" {
// Treat the given file as different edition of main movie
let edition_name = Text::new("Specify the edition's name (e.g. Director's Cut, Theatrical Version):").prompt();
match edition_name {
Ok(edition_name) => {
let original_path = file;
let ext = original_path.extension().unwrap_or_default();
let new_path = cfg.plex_library.join(format!("Movies/{0} {{tmdb-{1}}}/{0} {{tmdb-{1}}} {{edition-{3}}}.{2}", sanitise(primary_media.title.as_str()), primary_media.id, ext.to_str().unwrap_or_default(), edition_name));
moves.push(Move { from: original_path, to: new_path });
return;
},
Err(e) => {
error!("There was an error: {:#?}", e);
return;
},
}
}
let initial_value = file.file_stem().unwrap_or_default().to_str().unwrap_or_default();
let description = Text::new(format!("Give this {} a descriptive name:", choice).as_str()).with_initial_value(initial_value).prompt();
match description {
Ok(description) => {
let original_path = file;
let ext = original_path.extension().unwrap_or_default();
let new_path = cfg.plex_library.join(format!("Movies/{0} {{tmdb-{1}}}/{3}/{4}.{2}", sanitise(primary_media.title.as_str()), primary_media.id, ext.to_str().unwrap_or_default(), choice, description));
moves.push(Move { from: original_path, to: new_path });
return;
},
Err(e) => {
error!("There was an error: {:#?}", e);
return;
},
}
},
Err(e) => {
error!("There was an error: {:#?}", e);
return;
},
}
}
}
} else {
match file.extension() {
Some(ext) => {
if ext.eq_ignore_ascii_case("srt") ||
ext.eq_ignore_ascii_case("ass") ||
ext.eq_ignore_ascii_case("ssa") ||
ext.eq_ignore_ascii_case("smi") ||
ext.eq_ignore_ascii_case("pgs") ||
ext.eq_ignore_ascii_case("vob") {
// Subtitle file
if primary_media.is_none() {
warn!("Can not categorize subtitle file without primary media, skipping.");
return;
}
let lang_code = Text::new(format!("Specify ISO-639-1 (2-letter) language code (e.g. 'en', 'de') or leave empty to discard for {style_bold}{}{style_reset}:", file.display()).as_str()).prompt();
match lang_code {
Ok(lang_code) => {
if lang_code == "" {
return;
}
let forced = Confirm::new("Is this a forced sub?").with_default(false).prompt();
match forced {
Ok(true) => {
// Forced
let original_path = file;
let ext = original_path.extension().unwrap_or_default();
let new_path = cfg.plex_library.join(format!("Movies/{0} {{tmdb-{1}}}/{0} {{tmdb-{1}}}.{3}.forced.{2}", sanitise(primary_media.as_ref().unwrap().title.as_str()), primary_media.as_ref().unwrap().id, ext.to_str().unwrap_or_default(), lang_code.to_ascii_lowercase()));
moves.push(Move { from: original_path, to: new_path });
return;
},
Ok(false) => {
// Non-forced
let original_path = file;
let ext = original_path.extension().unwrap_or_default();
let new_path = cfg.plex_library.join(format!("Movies/{0} {{tmdb-{1}}}/{0} {{tmdb-{1}}}.{3}.{2}", sanitise(primary_media.as_ref().unwrap().title.as_str()), primary_media.as_ref().unwrap().id, ext.to_str().unwrap_or_default(), lang_code.to_ascii_lowercase()));
moves.push(Move { from: original_path, to: new_path });
return;
},
Err(e) => {
error!("There was an error: {:#?}", e);
return;
},
}
},
Err(e) => {
error!("There was an error: {:#?}", e);
return;
},
}
} else {
info!("Not a video file nor subtitle, skipping");
return;
}
},
None => {
error!("File {:#?} has no file extension", file);
return;
}
}
}
},
Err(error) => error!("Can not get file header for {:#?}, Error: {:#?}", file, error),
}
}