bisc/Main.hs

492 lines
16 KiB
Haskell
Raw Permalink Normal View History

2021-05-10 17:12:44 +02:00
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE OverloadedLabels #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE ScopedTypeVariables #-}
2022-01-02 01:31:35 +01:00
{-# LANGUAGE MultiWayIf #-}
2018-09-21 21:34:31 +02:00
2021-09-06 00:02:14 +02:00
-- Databases
2022-01-02 02:34:27 +01:00
import Database.Selda (Text, liftIO, (!))
import Database.Selda.SQLite (withSQLite)
import Control.Monad.Trans.Resource (ResourceT)
2021-03-23 16:09:03 +01:00
import qualified Database.Selda as S
import qualified Database.LevelDB as L
import qualified Database.LevelDB.Streaming as LS
2021-09-06 00:02:14 +02:00
-- Error handling
2021-09-07 14:41:56 +02:00
import Control.Exception as BE
import Control.Monad.Catch as CE
import qualified System.Exit as E
2021-09-06 00:02:14 +02:00
-- Configuration
import qualified Options.Applicative as O
import qualified System.Directory as D
import qualified Data.Configurator as C
2021-05-10 17:12:44 +02:00
2021-09-06 00:02:14 +02:00
-- Text converion
import Data.Text.Encoding (decodeUtf8)
2021-03-23 16:09:03 +01:00
import qualified Data.Text as T
import qualified Data.Text.IO as T
import qualified Data.ByteString as B
2019-03-25 00:02:38 +01:00
2021-09-07 14:41:56 +02:00
-- Version information
import qualified Paths_bisc as Bisc
import Data.Version (showVersion)
2022-01-02 01:31:35 +01:00
-- File locking bypass
import qualified System.Posix.Files as Posix
2021-09-06 00:02:14 +02:00
-- Misc
2022-01-02 01:31:35 +01:00
import Data.List (nub, isInfixOf)
2021-09-06 00:02:14 +02:00
import Data.Maybe (mapMaybe)
import Data.Function ((&))
import Data.Default (def)
import Control.Monad (when)
import Control.Monad.Reader (ReaderT, runReaderT, asks)
import Control.Monad.Except (ExceptT, runExceptT, throwError)
import System.FilePath (joinPath, takeBaseName, (</>))
2019-03-25 00:02:38 +01:00
2021-09-06 00:02:14 +02:00
-- Options
-- | Configuration file settings
2019-03-25 00:02:38 +01:00
data Settings = Settings
2021-09-06 00:02:14 +02:00
{ webenginePath :: FilePath -- ^ webengine data directory
2021-03-21 19:02:54 +01:00
, whitelist :: [Text] -- ^ whitelisted domains
2021-09-06 00:02:14 +02:00
, options :: Options -- ^ cli options
}
-- | Command line options
data Options = Options
2021-09-07 14:41:56 +02:00
{ version :: Bool -- ^ print version number
, dryRun :: Bool -- ^ don't delete anything
2022-01-02 01:31:35 +01:00
, unsafe :: Bool -- ^ ignore locks
2021-09-06 00:02:14 +02:00
, configPath :: FilePath -- ^ config file path
2019-03-25 00:02:38 +01:00
}
2018-09-21 21:34:31 +02:00
2021-09-06 00:02:14 +02:00
-- | Command line parser
cliParser :: FilePath -> O.ParserInfo Options
cliParser defConfig = O.info (O.helper <*> parser) infos
where
parser = Options
<$> O.switch
2021-09-07 14:41:56 +02:00
( O.long "version"
<> O.short 'v'
<> O.help "Print the version number and exit"
)
<*> O.switch
2021-09-06 00:02:14 +02:00
( O.long "dry-run"
<> O.short 'n'
<> O.help ("Don't actually remove anything, "<>
"just show what would be done")
)
2022-01-02 01:31:35 +01:00
<*> O.switch
( O.long "unsafe"
<> O.short 'u'
<> O.help ("Ignore database locks. " <>
"This will probably corrupt the databases, but " <>
"works while the browser is running.")
)
2021-09-06 00:02:14 +02:00
<*> O.strOption
( O.long "config"
<> O.short 'c'
<> O.value defConfig
<> O.help "Specify a configuration file"
)
infos =
O.fullDesc <>
O.progDesc "A small tool that clears cookies (and more)"
2021-03-21 19:02:54 +01:00
-- SQL records
-- | Just a cookie
2018-09-21 21:34:31 +02:00
data Cookie = Cookie
2021-03-21 19:02:54 +01:00
{ host_key :: Text -- ^ cookie domain
, creation_utc :: Int -- ^ creation date
2021-03-23 16:09:03 +01:00
} deriving (S.Generic, Show)
2021-03-21 19:02:54 +01:00
-- | The origin (domain) of a quota
data QuotaOrigin = QuotaOrigin
{ origin :: Text -- ^ URL
, last_modified_time :: Int -- ^ creation date
2021-03-23 16:09:03 +01:00
} deriving (S.Generic, Show)
2018-09-21 21:34:31 +02:00
2021-03-23 16:09:03 +01:00
instance S.SqlRow Cookie
instance S.SqlRow QuotaOrigin
2021-03-21 19:02:54 +01:00
-- SQL tables
-- | Cookies table
2021-03-23 16:09:03 +01:00
cookies :: S.Table Cookie
cookies = S.table "cookies" []
2021-03-21 19:02:54 +01:00
-- | QuotaManager origins table
2021-03-23 16:09:03 +01:00
quotaOrigins :: S.Table QuotaOrigin
quotaOrigins = S.table "OriginInfoTable" []
2018-09-21 21:34:31 +02:00
2021-09-06 00:02:14 +02:00
2021-03-23 16:09:03 +01:00
-- | Main monad stack
--
-- * 'ReaderT' for accessing settings
-- * 'ExceptT' for custom errors
type Action = ReaderT Settings (ExceptT Text IO)
2018-09-23 13:40:12 +02:00
2021-03-23 16:09:03 +01:00
-- | Number of removed domains, list of domains
type Result = (Int, [Text])
2018-09-21 21:34:31 +02:00
2018-09-23 13:40:12 +02:00
2021-03-23 16:09:03 +01:00
-- * Main
-- | Clears all means of permanent storage
2019-03-25 00:02:38 +01:00
main :: IO ()
main = do
2021-09-06 00:02:14 +02:00
defConfig <- D.getXdgDirectory D.XdgConfig ("bisc" </> "bisc.conf")
opts <- O.execParser (cliParser defConfig)
2021-09-07 14:41:56 +02:00
when (version opts) $ do
putStrLn ("bisc " <> showVersion Bisc.version)
E.exitSuccess
2021-09-06 00:02:14 +02:00
run <- runAction <$> loadSettings opts
2021-09-07 14:42:15 +02:00
numFailures <- sum <$> mapM (uncurry run) actions
if numFailures == 0
then E.exitSuccess
else do
putStrLn ("\nwarning: " <> show numFailures <> " actions have failed")
E.exitWith (E.ExitFailure numFailures)
2021-03-23 16:09:03 +01:00
-- | Runs an 'Action' and pretty-prints the results
2021-09-07 14:42:15 +02:00
runAction :: Settings -> Text -> Action Result -> IO Int
2021-03-23 16:09:03 +01:00
runAction settings name x = do
2021-05-10 17:12:44 +02:00
a <- BE.try $ runExceptT (runReaderT x settings)
2021-03-23 16:09:03 +01:00
case a of
2021-09-07 14:42:15 +02:00
Right (Right res) -> printResult res >> return 0
Right (Left msg) -> printFailed msg >> return 1
Left (err :: BE.IOException) ->
printFailed (T.pack $ BE.displayException err) >> return 1
2021-03-21 19:02:54 +01:00
where
2021-09-07 14:42:15 +02:00
printFailed msg =
T.putStrLn ("- " <> name <> " cleaning failed:\n " <> msg)
2021-03-23 16:09:03 +01:00
printResult (n, bad)
2021-03-21 19:02:54 +01:00
| n > 0 = do
2021-09-06 00:42:09 +02:00
T.putStrLn ("- " <> name <> ": " <> verb <>
" " <> T.pack (show n) <> " entries for:")
2021-03-23 16:09:03 +01:00
T.putStrLn (T.unlines $ map (" * " <>) bad)
2021-05-10 17:12:44 +02:00
| otherwise = T.putStrLn ("- " <> name <> ": nothing to delete")
2021-09-06 00:42:09 +02:00
verb = if (dryRun . options $ settings)
then "would delete"
else "deleted"
2021-03-23 16:09:03 +01:00
2021-03-21 19:02:54 +01:00
2021-03-23 16:09:03 +01:00
-- * Cleaning actions
2021-03-21 19:02:54 +01:00
2021-09-07 14:42:15 +02:00
-- | List of actions and their names
actions :: [(Text, Action Result)]
actions =
[ ("Cookies", deleteCookies)
, ("QuotaManager", deleteQuotaOrigins)
, ("IndexedDB", deleteIndexedDB)
, ("LocalStorage", deleteLocalStorage)
, ("SessionStorage", deleteSessionStorage)
]
2022-01-02 01:31:35 +01:00
2021-03-21 19:02:54 +01:00
-- | Deletes records in the Cookies database
2021-03-23 16:09:03 +01:00
deleteCookies :: Action Result
2021-03-21 19:02:54 +01:00
deleteCookies = do
2022-01-02 01:31:35 +01:00
dir <- asks webenginePath
2021-09-06 00:02:14 +02:00
dry <- asks (dryRun . options)
2022-01-02 01:31:35 +01:00
2022-01-02 02:34:27 +01:00
-- check for database
exists <- liftIO $ D.doesFileExist (dir </> "Cookies")
2021-05-10 17:12:44 +02:00
when (not exists) (throwError "database is missing")
2021-03-23 16:09:03 +01:00
whitelist <- map S.text <$> asks whitelist
2022-01-02 02:34:27 +01:00
withoutLocks "Cookies" $ \database -> do
2022-01-02 01:31:35 +01:00
CE.handle dbErrors $ withSQLite database $ do
bad <- S.query $ do
cookie <- S.select cookies
S.restrict (by whitelist cookie)
return (cookie ! #host_key)
when (not dry) $
S.deleteFrom_ cookies (by whitelist)
return (length bad, nub bad)
2018-09-23 13:40:12 +02:00
where
2021-03-23 16:09:03 +01:00
by set x = S.not_ (x ! #host_key `S.isIn` set)
2018-09-23 13:40:12 +02:00
2021-03-21 19:02:54 +01:00
-- | Deletes records in the QuotaManager API database
2021-03-23 16:09:03 +01:00
deleteQuotaOrigins :: Action Result
2021-03-21 19:02:54 +01:00
deleteQuotaOrigins = do
2022-01-02 01:31:35 +01:00
dir <- asks webenginePath
2021-09-06 00:02:14 +02:00
dry <- asks (dryRun . options)
2022-01-02 01:31:35 +01:00
2022-01-02 02:34:27 +01:00
-- check for database
exists <- liftIO $ D.doesFileExist (dir </> "QuotaManager")
2021-05-10 17:12:44 +02:00
when (not exists) (throwError "database is missing")
2022-01-02 02:34:27 +01:00
whitelist <- map mkPattern <$> asks whitelist
withoutLocks "QuotaManager" $ \database -> do
2022-01-02 01:31:35 +01:00
CE.handle dbErrors $ withSQLite database $ do
bad <- S.query $ do
quota <- S.select quotaOrigins
S.restrict (by whitelist quota)
return (quota ! #origin)
when (not dry) $
S.deleteFrom_ quotaOrigins (by whitelist)
return (length bad, nub bad)
2021-03-21 19:02:54 +01:00
where
-- check if quota is not whitelisted
by whitelist quota = S.not_ (S.true `S.isIn` matches)
where
url = quota ! #origin
matches = do
pattern <- S.selectValues (map S.Only whitelist)
S.restrict (url `S.like` S.the pattern)
return S.true
2021-03-21 19:02:54 +01:00
-- turns domains into patterns to match a url
2022-01-02 02:34:27 +01:00
mkPattern domain = "http%://%" <> domain <> "/"
2021-03-21 19:02:54 +01:00
-- | Deletes per-domain files under the IndexedDB directory
--
-- For example:
--
-- https_example.com_0.indexeddb.leveldb
-- https_www.example.com_0.indexeddb.leveldb
--
2021-03-23 16:09:03 +01:00
deleteIndexedDB :: Action Result
2021-03-21 19:02:54 +01:00
deleteIndexedDB = do
2019-03-25 00:02:38 +01:00
webengine <- asks webenginePath
2021-09-06 00:02:14 +02:00
dry <- asks (dryRun . options)
2021-05-10 17:12:44 +02:00
exists <- liftIO $ D.doesDirectoryExist (webengine </> "IndexedDB")
when (not exists) $ throwError "directory is missing"
entries <- listDirectoryAbs (webengine </> "IndexedDB")
2022-01-02 01:31:35 +01:00
unlisted <- (\domains -> not . (`elem` domains)) <$> asks whitelist
2018-09-23 13:40:12 +02:00
let
badFiles = filterMaybe (fmap unlisted . domain) entries
badDomains = mapMaybe domain badFiles
2021-09-06 00:02:14 +02:00
when (not dry) $
liftIO $ mapM_ D.removePathForcibly badFiles
2018-09-23 13:40:12 +02:00
return (length badFiles, nub badDomains)
where
2021-05-10 17:12:44 +02:00
listDirectoryAbs :: FilePath -> Action [FilePath]
listDirectoryAbs dir = liftIO $ map (dir </>) <$> D.listDirectory dir
2018-09-23 13:40:12 +02:00
maybeToBool :: Maybe Bool -> Bool
maybeToBool Nothing = False
maybeToBool (Just x) = x
filterMaybe :: (a -> Maybe Bool) -> [a] -> [a]
filterMaybe f = filter (maybeToBool . f)
domain :: FilePath -> Maybe Text
domain = extract . url where
extract [] = Nothing
2021-09-06 00:02:14 +02:00
extract (_:[]) = Nothing
extract (_:xs) = Just $ T.unwords (init xs)
2018-09-23 13:40:12 +02:00
url = T.splitOn "_" . T.pack . takeBaseName
2021-03-23 16:09:03 +01:00
-- | Deletes records from the local storage levelDB database
--
-- The schema consists of two (or more) records for each url:
--
-- "META:<url>" which stores metadata
-- "_<url>\NUL\SOH<key>" which stores the actual data
--
-- See https://source.chromium.org/chromium/chromium/src/+/master:components/services/storage/dom_storage/local_storage_impl.cc;l=51
--
deleteLocalStorage :: Action Result
deleteLocalStorage = do
webengine <- asks webenginePath
whitelist <- asks whitelist
let path = webengine </> "Local Storage" </> "leveldb"
2022-01-02 01:31:35 +01:00
dry <- asks (dryRun . options)
unsafe <- asks (unsafe . options)
when (not dry && unsafe) $ liftIO $ do
-- delete and recreate the lock file to bypass POSIX locks
D.removeFile (path </> "LOCK")
T.writeFile (path </> "LOCK") ""
2021-03-24 10:33:05 +01:00
dbIsOk <- liftIO $ D.doesFileExist (path </> "LOCK")
2021-05-10 17:12:44 +02:00
when (not dbIsOk) (throwError "database is missing or corrupted")
2021-03-24 10:33:05 +01:00
2021-05-10 22:06:56 +02:00
version <- withRetryDB path (\db -> L.get db def "VERSION")
2021-05-10 17:12:44 +02:00
when (version /= Just "1") (throwError "database is empty or the schema unsupported")
2021-03-23 16:09:03 +01:00
2022-01-02 01:31:35 +01:00
-- when dry running replace the delete function with a nop
2021-09-06 00:02:14 +02:00
let delete = if dry then (\_ _ _ -> pure ()) else L.delete
withDB path $ \db -> do
badDomains <- L.withIterator db def $ \i ->
LS.keySlice i LS.AllKeys LS.Asc
& LS.filter (\k -> "META:" `B.isPrefixOf ` k
2021-05-10 22:06:56 +02:00
&& (metaDomain k) `notElem` whitelist)
2021-09-06 00:02:14 +02:00
& LS.mapM (\k -> delete db def k >> return (metaDomain k))
& LS.toList
n <- L.withIterator db def $ \i ->
LS.keySlice i LS.AllKeys LS.Asc
& LS.filter (\k -> "_" `B.isPrefixOf` k
&& "\NUL\SOH" `B.isInfixOf` k
&& (recDomain k) `notElem` whitelist)
2021-09-06 00:02:14 +02:00
& LS.mapM (delete db def)
& LS.length
return (n, badDomains)
2021-03-23 16:09:03 +01:00
where
-- extract domains from the keys
domain = snd . T.breakOnEnd "://" . decodeUtf8
metaDomain = domain . B.drop 5
recDomain = domain . head . B.split 0 . B.drop 1
2021-03-23 18:55:27 +01:00
-- | Deletes records from the session storage levelDB database
--
-- The schema consists of a map `url -> id` and records under `id`:
--
-- namespace-<session-uid>-<url> = <id>
-- map-<id>-<key> = <value>
--
-- See https://source.chromium.org/chromium/chromium/src/+/master:components/services/storage/dom_storage/session_storage_metadata.cc;l=21
--
deleteSessionStorage :: Action Result
deleteSessionStorage = do
webengine <- asks webenginePath
whitelist <- asks whitelist
let path = webengine </> "Session Storage"
2022-01-02 01:31:35 +01:00
dry <- asks (dryRun . options)
unsafe <- asks (unsafe . options)
when (not dry && unsafe) $ liftIO $ do
-- delete and recreate the lock file to bypass POSIX locks
D.removeFile (path </> "LOCK")
T.writeFile (path </> "LOCK") ""
2021-03-24 10:33:05 +01:00
dbIsOk <- liftIO $ D.doesFileExist (path </> "LOCK")
2021-05-10 17:12:44 +02:00
when (not dbIsOk) (throwError "database is missing or corrupted")
2021-03-24 10:33:05 +01:00
2021-05-10 22:06:56 +02:00
version <- withRetryDB path (\db -> L.get db def "version")
2021-05-10 17:12:44 +02:00
when (version /= Just "1") (throwError "database is empty or the schema unsupported")
2021-03-23 18:55:27 +01:00
2022-01-02 01:31:35 +01:00
-- when dry running replace the delete function with a nop
2021-09-06 00:02:14 +02:00
let delete = if dry then (\_ _ _ -> pure ()) else L.delete
2021-03-23 18:55:27 +01:00
withDB path $ \db -> do
-- map of id -> isBad
badMap <- L.withIterator db def $ \i ->
LS.keySlice i LS.AllKeys LS.Asc
& LS.filter (B.isPrefixOf "namespace")
& LS.mapM (\k -> (,) <$> L.get db def k <*> pure (isBad whitelist k))
& LS.toList
-- delete the unlisted domains map
badDomains <- L.withIterator db def $ \i ->
LS.keySlice i LS.AllKeys LS.Asc
& LS.filter (B.isPrefixOf "namespace")
& LS.filter (isBad whitelist)
2021-09-06 00:02:14 +02:00
& LS.mapM (\k -> delete db def k >> return (domain k))
2021-03-23 18:55:27 +01:00
& LS.toList
-- and their records
n <- L.withIterator db def $ \i ->
LS.keySlice i LS.AllKeys LS.Asc
& LS.filter (B.isPrefixOf "map-")
& LS.mapM (\k ->
case lookup (originId k) badMap of
2021-09-06 00:02:14 +02:00
Just True -> delete db def k >> return 1
2021-03-23 18:55:27 +01:00
_ -> return 0)
& LS.sum
return (n, nub badDomains)
where
isBad whitelist = not . flip elem whitelist . domain
-- extract domain from keys (47 = length "namespace-<uid>-")
url = decodeUtf8 . B.drop 47
domain = (!! 2). T.splitOn "/" . url
-- extract id from key: drop "map-", take until "-" (ascii 45)
originId = Just . B.takeWhile (/= 45). B.drop 4
2021-03-23 16:09:03 +01:00
-- * Helper functions
-- | Loads a leveldb database and runs a resourceT action
2022-01-02 02:34:27 +01:00
withDB :: FilePath -> (L.DB -> ResourceT IO a) -> Action a
2021-03-23 16:09:03 +01:00
withDB path f = liftIO $ L.runResourceT (L.open path def >>= f)
2022-01-02 01:31:35 +01:00
2021-05-10 22:06:56 +02:00
-- | Like 'withDB' but retry the action after repairing the db
2022-01-02 02:34:27 +01:00
withRetryDB :: FilePath -> (L.DB -> ResourceT IO a) -> Action a
2021-05-10 22:06:56 +02:00
withRetryDB path action = do
res <- CE.try (withDB path action)
case res of
Right b -> return b
Left (e :: BE.IOException) ->
2022-01-02 01:31:35 +01:00
if | "Corruption" `T.isInfixOf` msg -> do
-- try repairing before giving up
liftIO $ L.repair path def
withDB path action
| "unavailable" `T.isInfixOf` msg ->
throwError "database is locked (in use by another process)"
| otherwise ->
throwError ("error opening the database:\n " <> msg)
2021-05-10 22:06:56 +02:00
where msg = T.pack (BE.displayException e)
2022-01-02 01:31:35 +01:00
-- | Bypass SQLite locking mechanism
--
-- SQLite manages concurrent access via POSIX locks: these are tied to a
-- specific file and pid. They can be bypassed by simply creating a hard
-- link (pointing to the same inode), editing the link and then removing it.
2022-01-02 02:34:27 +01:00
withoutLocks :: String -> (FilePath -> Action a) -> Action a
withoutLocks dbName cont = do
2022-01-02 01:31:35 +01:00
dir <- asks webenginePath
2022-01-02 02:34:27 +01:00
unsafe <- asks (unsafe . options)
2022-01-02 01:31:35 +01:00
let
real = dir </> dbName
link = real <> "-bypass"
-- bypass the SQLite POSIX locks with hard links
2022-01-02 02:34:27 +01:00
when unsafe $ liftIO (Posix.createLink real link)
res <- cont (if unsafe then link else real)
2022-01-02 01:31:35 +01:00
-- remove the hard links
2022-01-02 02:34:27 +01:00
when unsafe $ liftIO (Posix.removeLink link)
2022-01-02 01:31:35 +01:00
return res
2021-09-06 00:02:14 +02:00
-- | Loads the config file/cli options
loadSettings :: Options -> IO Settings
loadSettings opts = do
2021-03-21 19:02:54 +01:00
configdir <- D.getXdgDirectory D.XdgConfig "qutebrowser"
datadir <- D.getXdgDirectory D.XdgData "qutebrowser"
2019-03-25 00:02:38 +01:00
let
defaultWhitelist = joinPath [configdir, "whitelists", "cookies"]
defaultWebengine = joinPath [datadir, "webengine"]
2018-09-21 21:34:31 +02:00
2021-09-06 00:02:14 +02:00
config <- C.load [C.Optional (configPath opts)]
2021-03-21 19:02:54 +01:00
whitelist <- C.lookupDefault defaultWhitelist config "whitelist-path"
webengine <- C.lookupDefault defaultWebengine config "webengine-path"
domains <- T.lines <$> T.readFile whitelist
2019-03-25 00:02:38 +01:00
2021-09-06 00:02:14 +02:00
return (Settings webengine domains opts)
2021-05-10 17:12:44 +02:00
2022-01-02 01:31:35 +01:00
2021-05-10 17:12:44 +02:00
-- | Catches any Selda error
dbErrors :: S.SeldaError -> Action a
2022-01-02 01:31:35 +01:00
dbErrors (S.DbError msg) = throwError $ "error opening database: " <> T.pack msg
dbErrors e =
if "ErrorBusy" `isInfixOf` msg
then throwError "database is locked (in use by another process)"
else throwError $ "database operation failed: " <> T.pack msg
where msg = BE.displayException e