summaryrefslogtreecommitdiff
path: root/Annex/FileMatcher.hs
blob: b41a4a421f0c512bb340f89f7c0b37e10ac7b625 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
{- git-annex file matching
 -
 - Copyright 2012-2019 Joey Hess <id@joeyh.name>
 -
 - Licensed under the GNU AGPL version 3 or higher.
 -}

{-# LANGUAGE CPP #-}

module Annex.FileMatcher (
	GetFileMatcher,
	checkFileMatcher,
	checkFileMatcher',
	checkMatcher,
	checkMatcher',
	matchAll,
	PreferredContentData(..),
	preferredContentTokens,
	preferredContentKeylessTokens,
	preferredContentParser,
	ParseToken,
	parsedToMatcher,
	mkLargeFilesParser,
	largeFilesMatcher,
) where

import qualified Data.Map as M

import Annex.Common
import Limit
import Utility.Matcher
import Types.Group
import qualified Annex
import Types.FileMatcher
import Git.FilePath
import Types.Remote (RemoteConfig)
import Annex.CheckAttr
import Git.CheckAttr (unspecifiedAttr)
#ifdef WITH_MAGICMIME
import Annex.Magic
#endif

import Data.Either
import qualified Data.Set as S

type GetFileMatcher = FilePath -> Annex (FileMatcher Annex)

checkFileMatcher :: GetFileMatcher -> FilePath -> Annex Bool
checkFileMatcher getmatcher file = checkFileMatcher' getmatcher file (return True)

-- | Allows running an action when no matcher is configured for the file.
checkFileMatcher' :: GetFileMatcher -> FilePath -> Annex Bool -> Annex Bool
checkFileMatcher' getmatcher file notconfigured = do
	matcher <- getmatcher file
	checkMatcher matcher Nothing afile S.empty notconfigured d
  where
	afile = AssociatedFile (Just file)
	-- checkMatcher will never use this, because afile is provided.
	d = return True

checkMatcher :: FileMatcher Annex -> Maybe Key -> AssociatedFile -> AssumeNotPresent -> Annex Bool -> Annex Bool -> Annex Bool
checkMatcher matcher mkey afile notpresent notconfigured d
	| isEmpty matcher = notconfigured
	| otherwise = case (mkey, afile) of
		(_, AssociatedFile (Just file)) -> go =<< fileMatchInfo file
		(Just key, _) -> go (MatchingKey key afile)
		_ -> d
  where
	go mi = checkMatcher' matcher mi notpresent

checkMatcher' :: FileMatcher Annex -> MatchInfo -> AssumeNotPresent -> Annex Bool
checkMatcher' matcher mi notpresent =
	matchMrun matcher $ \a -> a notpresent mi

fileMatchInfo :: FilePath -> Annex MatchInfo
fileMatchInfo file = do
	matchfile <- getTopFilePath <$> inRepo (toTopFilePath file)
	return $ MatchingFile FileInfo
		{ matchFile = matchfile
		, currFile = file
		}

matchAll :: FileMatcher Annex
matchAll = generate []

parsedToMatcher :: [ParseResult (MatchFiles Annex)] -> Either String (FileMatcher Annex)
parsedToMatcher parsed = case partitionEithers parsed of
	([], vs) -> Right $ generate vs
	(es, _) -> Left $ unwords $ map ("Parse failure: " ++) es

data ParseToken t
	= SimpleToken String (ParseResult t)
	| ValueToken String (String -> ParseResult t)

type ParseResult t = Either String (Token t)

parseToken :: [ParseToken t] -> String -> ParseResult t
parseToken l t = case syntaxToken t of
	Right st -> Right st
	Left _ -> go l
  where
	go [] = Left $ "near " ++ show t
	go (SimpleToken s r : _) | s == t = r
	go (ValueToken s mkr : _) | s == k = mkr v
	go (_ : ps) = go ps
	(k, v) = separate (== '=') t

{- This is really dumb tokenization; there's no support for quoted values.
 - Open and close parens are always treated as standalone tokens;
 - otherwise tokens must be separated by whitespace. -}
tokenizeMatcher :: String -> [String]
tokenizeMatcher = filter (not . null) . concatMap splitparens . words
  where
	splitparens = segmentDelim (`elem` "()")

commonKeylessTokens :: LimitBy -> [ParseToken (MatchFiles Annex)]
commonKeylessTokens lb =
	[ SimpleToken "anything" (simply limitAnything)
	, SimpleToken "nothing" (simply limitNothing)
	, ValueToken "include" (usev limitInclude)
	, ValueToken "exclude" (usev limitExclude)
	, ValueToken "largerthan" (usev $ limitSize lb (>))
	, ValueToken "smallerthan" (usev $ limitSize lb (<))
	]

commonKeyedTokens :: [ParseToken (MatchFiles Annex)]
commonKeyedTokens = 
	[ SimpleToken "unused" (simply limitUnused)
	]

data PreferredContentData = PCD
	{ matchStandard :: Either String (FileMatcher Annex)
	, matchGroupWanted :: Either String (FileMatcher Annex)
	, getGroupMap :: Annex GroupMap
	, configMap :: M.Map UUID RemoteConfig
	, repoUUID :: Maybe UUID
	}

-- Tokens of preferred content expressions that do not need a Key to be
-- known. 
--
-- When importing from a special remote, this is used to match
-- some preferred content expressions before the content is downloaded,
-- so the Key is not known.
preferredContentKeylessTokens :: PreferredContentData -> [ParseToken (MatchFiles Annex)]
preferredContentKeylessTokens pcd =
	[ SimpleToken "standard" (call $ matchStandard pcd)
	, SimpleToken "groupwanted" (call $ matchGroupWanted pcd)
	, SimpleToken "inpreferreddir" (simply $ limitInDir preferreddir)
	] ++ commonKeylessTokens LimitAnnexFiles
  where
	preferreddir = fromMaybe "public" $
		M.lookup "preferreddir" =<< (`M.lookup` configMap pcd) =<< repoUUID pcd

preferredContentKeyedTokens :: PreferredContentData -> [ParseToken (MatchFiles Annex)]
preferredContentKeyedTokens pcd =
	[ SimpleToken "present" (simply $ limitPresent $ repoUUID pcd)
	, SimpleToken "securehash" (simply limitSecureHash)
	, ValueToken "copies" (usev limitCopies)
	, ValueToken "lackingcopies" (usev $ limitLackingCopies False)
	, ValueToken "approxlackingcopies" (usev $ limitLackingCopies True)
	, ValueToken "inbacked" (usev limitInBackend)
	, ValueToken "metadata" (usev limitMetaData)
	, ValueToken "inallgroup" (usev $ limitInAllGroup $ getGroupMap pcd)
	] ++ commonKeyedTokens

preferredContentTokens :: PreferredContentData -> [ParseToken (MatchFiles Annex)]
preferredContentTokens pcd = concat
	[ preferredContentKeylessTokens pcd
	, preferredContentKeyedTokens pcd
	]

preferredContentParser :: [ParseToken (MatchFiles Annex)] -> String -> [ParseResult (MatchFiles Annex)]
preferredContentParser tokens = map (parseToken tokens) . tokenizeMatcher

mkLargeFilesParser :: Annex (String -> [ParseResult (MatchFiles Annex)])
mkLargeFilesParser = do
#ifdef WITH_MAGICMIME
	magicmime <- liftIO initMagicMime
	let mimer n f = ValueToken n (usev $ f magicmime)
#else
	let mimer n = ValueToken n $ 
		const $ Left $ "\""++n++"\" not supported; not built with MagicMime support"
#endif
	let parse = parseToken $
		commonKeyedTokens ++
		commonKeylessTokens LimitDiskFiles ++
#ifdef WITH_MAGICMIME
		[ mimer "mimetype" $
			matchMagic "mimetype" getMagicMimeType providedMimeType
		, mimer "mimeencoding" $
			matchMagic "mimeencoding" getMagicMimeEncoding providedMimeEncoding
		]
#else
		[ mimer "mimetype"
		, mimer "mimeencoding"
		]
#endif
	return $ map parse . tokenizeMatcher
  where

{- Generates a matcher for files large enough (or meeting other criteria)
 - to be added to the annex, rather than directly to git. -}
largeFilesMatcher :: Annex GetFileMatcher
largeFilesMatcher = go =<< annexLargeFiles <$> Annex.getGitConfig
  where
	go (Just expr) = do
		matcher <- mkmatcher expr
		return $ const $ return matcher
	go Nothing = return $ \file -> do
		expr <- checkAttr "annex.largefiles" file
		if null expr || expr == unspecifiedAttr
			then return matchAll
			else mkmatcher expr

	mkmatcher expr = do
		parser <- mkLargeFilesParser
		either badexpr return $ parsedToMatcher $ parser expr
	badexpr e = giveup $ "bad annex.largefiles configuration: " ++ e

simply :: MatchFiles Annex -> ParseResult (MatchFiles Annex)
simply = Right . Operation

usev :: MkLimit Annex -> String -> ParseResult (MatchFiles Annex)
usev a v = Operation <$> a v

call :: Either String (FileMatcher Annex) -> ParseResult (MatchFiles Annex)
call (Right sub) = Right $ Operation $ \notpresent mi ->
	matchMrun sub $ \a -> a notpresent mi
call (Left err) = Left err