#!/usr/local/bin/texlua
kpse.set_program_name("luatex")
-- ctanbib -- export ctan entries to bib format
-- Copyright: Michal Hoftich <michal.h21@gmail.com> (2014-2018)
--
-- This work may be distributed and/or modified under the
-- conditions of the LaTeX Project Public License, either version 1.3
-- of this license or (at your option) any later version.
-- The latest version of this license is in
--   http://www.latex-project.org/lppl.txt
-- and version 1.3 or later is part of all distributions of LaTeX
-- version 2005/12/01 or later.
-- 
-- This work has the LPPL maintenance status `maintained'.
-- 
-- The Current Maintainer of this work is Michal Hoftich

local bibtype = "manual"
if #arg < 1 or arg[1]=="--help" or arg[1]=="-h" then
  print([[ctanbib - convert ctan package information to bibtex format
Usage:
ctanbib [options] <package name>
Available options:
  -c,--ctan       Use @ctan type instead of @manual
  -h,--help       Print this message
  -v,--version    Print version info
  ]])
  os.exit(1)
elseif arg[1]=="--version" or arg[1]=="-v" then
  print "ctanbib version v0.1b"
  os.exit(1)
elseif arg[1]=="--ctan" or arg[1]=="-c" then
  table.remove(arg, 1)
  bibtype = "ctan"
end

local pkgname = arg[1]
local url = "https://www.ctan.org/xml/pkg/" .. pkgname

-- change that for different title scheme
local titleformat = "The %s package"

local bibtexformat = [[
@manual{$package,
title = {$title},
subtitle = {$subtitle},
author = {$author},
url = {$url},
urldate = {$urldate}, 
date = {$date},
version = {$version}
}
]]

local dom = require('luaxml-domobject')


local load_xml =  function(url)
  local command = io.popen("wget -qO- ".. url,"r")

  local info = command:read("*all")
  command:close()

  if string.len(info) == 0 then
    return false
  end
  return dom.parse(info)
end

local bibtex_escape = function(a)
  local a = a or ""
  return a:gsub("([%$%{%}%\\])", function(x) 
    if x == "\\" then return "\\textbackslash  " end
    return '\\'..x 
  end)
end

local get_authors = function(a)
  local retrieved_authors = {}
  for _, author in ipairs(a) do
    local current = {}
    current[#current+1] = author:get_attribute("familyname")
    current[#current+1] = author:get_attribute("givenname")
    -- the author is an organization
    if #current == 1 then
      table.insert(retrieved_authors, "{" .. current[1] .. "}")
    else
      table.insert(retrieved_authors, table.concat(current, ", "))
    end
  end
  return table.concat(retrieved_authors," and ")
end

local get_title = function(record)
  local title = record:query_selector("name")[1]
  if title then
    title = title:get_text()
    title = title:gsub("^(.)", function(a) return unicode.utf8.upper(a) end)
  else
    title = pkgname
  end
  return string.format(titleformat, bibtex_escape(title))
end


local get_url = function(record)
  local home = record:query_selector("home")[1]
  if home then return home:get_attribute("href") end
  return "http://www.ctan.org/pkg/"..pkgname
end

local get_caption = function(record)
  local caption = record:query_selector("caption")[1]
  if caption then return bibtex_escape(caption:get_text()) end
  return nil
end

local get_version = function(record)
  local version = record:query_selector("version")[1]
  if version then
    return version:get_attribute("number"), version:get_attribute("date")
  end
end

local ctan_url = function(record)
  local ctan = record:query_selector("ctan")[1] 
  -- some package don't contain the CTAN path
  if not ctan then return get_url(record) end
  local path = ctan:get_attribute("path")
  return path
end


local compile = function(template, records)
  return template:gsub("$([a-z]+)", function(a) 
    return records[a] or ""
  end)
end

local record = load_xml(url)

if not record then
  print("Cannot find entry for package "..pkgname)
  os.exit(1)
end

-- root element is also saved, so we use this trick 
-- local record = entry.entry

local e = {}

e.author = get_authors(record:query_selector("authorref"))
e.package = pkgname
e.title = get_title(record)
e.subtitle = get_caption(record)
e.url = get_url(record)
-- use the CTAN path as url for the CTAN type
if bibtype == "ctan" then
  e.url = ctan_url(record)
end
e.version, e.date = get_version(record)
e.urldate = os.date("%Y-%m-%d")

local result = compile(bibtexformat, e)
-- update the bibliography type
result = result:gsub("^@manual", "@" .. bibtype)

print(result)
