/[linux-patches]/genpatches-2.6/trunk/3.4/1019_linux-3.4.20.patch
Gentoo

Contents of /genpatches-2.6/trunk/3.4/1019_linux-3.4.20.patch

Parent Directory Parent Directory | Revision Log Revision Log


Revision 2254 - (show annotations) (download)
Wed Dec 19 19:51:16 2012 UTC (19 months, 1 week ago) by mpagano
File size: 186622 byte(s)
Linux patches 3.4.12 through and including 3.4.24
1 diff --git a/Documentation/cgroups/memory.txt b/Documentation/cgroups/memory.txt
2 index 9b1067a..68c5411 100644
3 --- a/Documentation/cgroups/memory.txt
4 +++ b/Documentation/cgroups/memory.txt
5 @@ -466,6 +466,10 @@ Note:
6 5.3 swappiness
7
8 Similar to /proc/sys/vm/swappiness, but affecting a hierarchy of groups only.
9 +Please note that unlike the global swappiness, memcg knob set to 0
10 +really prevents from any swapping even if there is a swap storage
11 +available. This might lead to memcg OOM killer if there are no file
12 +pages to reclaim.
13
14 Following cgroups' swappiness can't be changed.
15 - root cgroup (uses /proc/sys/vm/swappiness).
16 diff --git a/Makefile b/Makefile
17 index e264929..9c89559 100644
18 --- a/Makefile
19 +++ b/Makefile
20 @@ -1,6 +1,6 @@
21 VERSION = 3
22 PATCHLEVEL = 4
23 -SUBLEVEL = 19
24 +SUBLEVEL = 20
25 EXTRAVERSION =
26 NAME = Saber-toothed Squirrel
27
28 diff --git a/arch/arm/plat-omap/include/plat/omap-serial.h b/arch/arm/plat-omap/include/plat/omap-serial.h
29 index c369c9d..9ff4444 100644
30 --- a/arch/arm/plat-omap/include/plat/omap-serial.h
31 +++ b/arch/arm/plat-omap/include/plat/omap-serial.h
32 @@ -42,10 +42,10 @@
33 #define OMAP_UART_WER_MOD_WKUP 0X7F
34
35 /* Enable XON/XOFF flow control on output */
36 -#define OMAP_UART_SW_TX 0x8
37 +#define OMAP_UART_SW_TX 0x04
38
39 /* Enable XON/XOFF flow control on input */
40 -#define OMAP_UART_SW_RX 0x2
41 +#define OMAP_UART_SW_RX 0x04
42
43 #define OMAP_UART_SYSC_RESET 0X07
44 #define OMAP_UART_TCR_TRIG 0X0F
45 diff --git a/arch/m68k/include/asm/signal.h b/arch/m68k/include/asm/signal.h
46 index 60e8866..93fe83e 100644
47 --- a/arch/m68k/include/asm/signal.h
48 +++ b/arch/m68k/include/asm/signal.h
49 @@ -156,7 +156,7 @@ typedef struct sigaltstack {
50 static inline void sigaddset(sigset_t *set, int _sig)
51 {
52 asm ("bfset %0{%1,#1}"
53 - : "+od" (*set)
54 + : "+o" (*set)
55 : "id" ((_sig - 1) ^ 31)
56 : "cc");
57 }
58 @@ -164,7 +164,7 @@ static inline void sigaddset(sigset_t *set, int _sig)
59 static inline void sigdelset(sigset_t *set, int _sig)
60 {
61 asm ("bfclr %0{%1,#1}"
62 - : "+od" (*set)
63 + : "+o" (*set)
64 : "id" ((_sig - 1) ^ 31)
65 : "cc");
66 }
67 @@ -180,7 +180,7 @@ static inline int __gen_sigismember(sigset_t *set, int _sig)
68 int ret;
69 asm ("bfextu %1{%2,#1},%0"
70 : "=d" (ret)
71 - : "od" (*set), "id" ((_sig-1) ^ 31)
72 + : "o" (*set), "id" ((_sig-1) ^ 31)
73 : "cc");
74 return ret;
75 }
76 diff --git a/arch/s390/include/asm/compat.h b/arch/s390/include/asm/compat.h
77 index 234f1d8..2e0a15b 100644
78 --- a/arch/s390/include/asm/compat.h
79 +++ b/arch/s390/include/asm/compat.h
80 @@ -20,7 +20,7 @@
81 #define PSW32_MASK_CC 0x00003000UL
82 #define PSW32_MASK_PM 0x00000f00UL
83
84 -#define PSW32_MASK_USER 0x00003F00UL
85 +#define PSW32_MASK_USER 0x0000FF00UL
86
87 #define PSW32_ADDR_AMODE 0x80000000UL
88 #define PSW32_ADDR_INSN 0x7FFFFFFFUL
89 diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h
90 index aeb77f0..d3750e7 100644
91 --- a/arch/s390/include/asm/ptrace.h
92 +++ b/arch/s390/include/asm/ptrace.h
93 @@ -240,7 +240,7 @@ typedef struct
94 #define PSW_MASK_EA 0x00000000UL
95 #define PSW_MASK_BA 0x00000000UL
96
97 -#define PSW_MASK_USER 0x00003F00UL
98 +#define PSW_MASK_USER 0x0000FF00UL
99
100 #define PSW_ADDR_AMODE 0x80000000UL
101 #define PSW_ADDR_INSN 0x7FFFFFFFUL
102 @@ -269,7 +269,7 @@ typedef struct
103 #define PSW_MASK_EA 0x0000000100000000UL
104 #define PSW_MASK_BA 0x0000000080000000UL
105
106 -#define PSW_MASK_USER 0x00003F0180000000UL
107 +#define PSW_MASK_USER 0x0000FF0180000000UL
108
109 #define PSW_ADDR_AMODE 0x0000000000000000UL
110 #define PSW_ADDR_INSN 0xFFFFFFFFFFFFFFFFUL
111 diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c
112 index 28040fd..0bdca3a 100644
113 --- a/arch/s390/kernel/compat_signal.c
114 +++ b/arch/s390/kernel/compat_signal.c
115 @@ -313,6 +313,10 @@ static int restore_sigregs32(struct pt_regs *regs,_sigregs32 __user *sregs)
116 regs->psw.mask = (regs->psw.mask & ~PSW_MASK_USER) |
117 (__u64)(regs32.psw.mask & PSW32_MASK_USER) << 32 |
118 (__u64)(regs32.psw.addr & PSW32_ADDR_AMODE);
119 + /* Check for invalid user address space control. */
120 + if ((regs->psw.mask & PSW_MASK_ASC) >= (psw_kernel_bits & PSW_MASK_ASC))
121 + regs->psw.mask = (psw_user_bits & PSW_MASK_ASC) |
122 + (regs->psw.mask & ~PSW_MASK_ASC);
123 regs->psw.addr = (__u64)(regs32.psw.addr & PSW32_ADDR_INSN);
124 for (i = 0; i < NUM_GPRS; i++)
125 regs->gprs[i] = (__u64) regs32.gprs[i];
126 @@ -494,7 +498,10 @@ static int setup_frame32(int sig, struct k_sigaction *ka,
127
128 /* Set up registers for signal handler */
129 regs->gprs[15] = (__force __u64) frame;
130 - regs->psw.mask |= PSW_MASK_BA; /* force amode 31 */
131 + /* Force 31 bit amode and default user address space control. */
132 + regs->psw.mask = PSW_MASK_BA |
133 + (psw_user_bits & PSW_MASK_ASC) |
134 + (regs->psw.mask & ~PSW_MASK_ASC);
135 regs->psw.addr = (__force __u64) ka->sa.sa_handler;
136
137 regs->gprs[2] = map_signal(sig);
138 @@ -562,7 +569,10 @@ static int setup_rt_frame32(int sig, struct k_sigaction *ka, siginfo_t *info,
139
140 /* Set up registers for signal handler */
141 regs->gprs[15] = (__force __u64) frame;
142 - regs->psw.mask |= PSW_MASK_BA; /* force amode 31 */
143 + /* Force 31 bit amode and default user address space control. */
144 + regs->psw.mask = PSW_MASK_BA |
145 + (psw_user_bits & PSW_MASK_ASC) |
146 + (regs->psw.mask & ~PSW_MASK_ASC);
147 regs->psw.addr = (__u64) ka->sa.sa_handler;
148
149 regs->gprs[2] = map_signal(sig);
150 diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c
151 index f7582b2..74f58e2 100644
152 --- a/arch/s390/kernel/signal.c
153 +++ b/arch/s390/kernel/signal.c
154 @@ -148,6 +148,10 @@ static int restore_sigregs(struct pt_regs *regs, _sigregs __user *sregs)
155 /* Use regs->psw.mask instead of psw_user_bits to preserve PER bit. */
156 regs->psw.mask = (regs->psw.mask & ~PSW_MASK_USER) |
157 (user_sregs.regs.psw.mask & PSW_MASK_USER);
158 + /* Check for invalid user address space control. */
159 + if ((regs->psw.mask & PSW_MASK_ASC) >= (psw_kernel_bits & PSW_MASK_ASC))
160 + regs->psw.mask = (psw_user_bits & PSW_MASK_ASC) |
161 + (regs->psw.mask & ~PSW_MASK_ASC);
162 /* Check for invalid amode */
163 if (regs->psw.mask & PSW_MASK_EA)
164 regs->psw.mask |= PSW_MASK_BA;
165 @@ -294,7 +298,10 @@ static int setup_frame(int sig, struct k_sigaction *ka,
166
167 /* Set up registers for signal handler */
168 regs->gprs[15] = (unsigned long) frame;
169 - regs->psw.mask |= PSW_MASK_EA | PSW_MASK_BA; /* 64 bit amode */
170 + /* Force default amode and default user address space control. */
171 + regs->psw.mask = PSW_MASK_EA | PSW_MASK_BA |
172 + (psw_user_bits & PSW_MASK_ASC) |
173 + (regs->psw.mask & ~PSW_MASK_ASC);
174 regs->psw.addr = (unsigned long) ka->sa.sa_handler | PSW_ADDR_AMODE;
175
176 regs->gprs[2] = map_signal(sig);
177 @@ -367,7 +374,10 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
178
179 /* Set up registers for signal handler */
180 regs->gprs[15] = (unsigned long) frame;
181 - regs->psw.mask |= PSW_MASK_EA | PSW_MASK_BA; /* 64 bit amode */
182 + /* Force default amode and default user address space control. */
183 + regs->psw.mask = PSW_MASK_EA | PSW_MASK_BA |
184 + (psw_user_bits & PSW_MASK_ASC) |
185 + (regs->psw.mask & ~PSW_MASK_ASC);
186 regs->psw.addr = (unsigned long) ka->sa.sa_handler | PSW_ADDR_AMODE;
187
188 regs->gprs[2] = map_signal(sig);
189 diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c
190 index 65cb06e..4ccf9f5 100644
191 --- a/arch/s390/mm/gup.c
192 +++ b/arch/s390/mm/gup.c
193 @@ -183,7 +183,7 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write,
194 addr = start;
195 len = (unsigned long) nr_pages << PAGE_SHIFT;
196 end = start + len;
197 - if (end < start)
198 + if ((end < start) || (end > TASK_SIZE))
199 goto slow_irqon;
200
201 /*
202 diff --git a/crypto/cryptd.c b/crypto/cryptd.c
203 index 671d4d6..7bdd61b 100644
204 --- a/crypto/cryptd.c
205 +++ b/crypto/cryptd.c
206 @@ -137,13 +137,18 @@ static void cryptd_queue_worker(struct work_struct *work)
207 struct crypto_async_request *req, *backlog;
208
209 cpu_queue = container_of(work, struct cryptd_cpu_queue, work);
210 - /* Only handle one request at a time to avoid hogging crypto
211 - * workqueue. preempt_disable/enable is used to prevent
212 - * being preempted by cryptd_enqueue_request() */
213 + /*
214 + * Only handle one request at a time to avoid hogging crypto workqueue.
215 + * preempt_disable/enable is used to prevent being preempted by
216 + * cryptd_enqueue_request(). local_bh_disable/enable is used to prevent
217 + * cryptd_enqueue_request() being accessed from software interrupts.
218 + */
219 + local_bh_disable();
220 preempt_disable();
221 backlog = crypto_get_backlog(&cpu_queue->queue);
222 req = crypto_dequeue_request(&cpu_queue->queue);
223 preempt_enable();
224 + local_bh_enable();
225
226 if (!req)
227 return;
228 diff --git a/drivers/acpi/video.c b/drivers/acpi/video.c
229 index 48b5a3c..62d9ee6 100644
230 --- a/drivers/acpi/video.c
231 +++ b/drivers/acpi/video.c
232 @@ -1345,12 +1345,15 @@ static int
233 acpi_video_bus_get_devices(struct acpi_video_bus *video,
234 struct acpi_device *device)
235 {
236 - int status;
237 + int status = 0;
238 struct acpi_device *dev;
239
240 - status = acpi_video_device_enumerate(video);
241 - if (status)
242 - return status;
243 + /*
244 + * There are systems where video module known to work fine regardless
245 + * of broken _DOD and ignoring returned value here doesn't cause
246 + * any issues later.
247 + */
248 + acpi_video_device_enumerate(video);
249
250 list_for_each_entry(dev, &device->children, node) {
251
252 diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
253 index 013c7a5..7b33136 100644
254 --- a/drivers/block/rbd.c
255 +++ b/drivers/block/rbd.c
256 @@ -175,8 +175,7 @@ struct rbd_device {
257 /* protects updating the header */
258 struct rw_semaphore header_rwsem;
259 char snap_name[RBD_MAX_SNAP_NAME_LEN];
260 - u32 cur_snap; /* index+1 of current snapshot within snap context
261 - 0 - for the head */
262 + u64 snap_id; /* current snapshot id */
263 int read_only;
264
265 struct list_head node;
266 @@ -450,7 +449,9 @@ static void rbd_client_release(struct kref *kref)
267 struct rbd_client *rbdc = container_of(kref, struct rbd_client, kref);
268
269 dout("rbd_release_client %p\n", rbdc);
270 + spin_lock(&rbd_client_list_lock);
271 list_del(&rbdc->node);
272 + spin_unlock(&rbd_client_list_lock);
273
274 ceph_destroy_client(rbdc->client);
275 kfree(rbdc->rbd_opts);
276 @@ -463,9 +464,7 @@ static void rbd_client_release(struct kref *kref)
277 */
278 static void rbd_put_client(struct rbd_device *rbd_dev)
279 {
280 - spin_lock(&rbd_client_list_lock);
281 kref_put(&rbd_dev->rbd_client->kref, rbd_client_release);
282 - spin_unlock(&rbd_client_list_lock);
283 rbd_dev->rbd_client = NULL;
284 }
285
286 @@ -498,7 +497,7 @@ static int rbd_header_from_disk(struct rbd_image_header *header,
287
288 snap_count = le32_to_cpu(ondisk->snap_count);
289 header->snapc = kmalloc(sizeof(struct ceph_snap_context) +
290 - snap_count * sizeof (*ondisk),
291 + snap_count * sizeof(u64),
292 gfp_flags);
293 if (!header->snapc)
294 return -ENOMEM;
295 @@ -552,21 +551,6 @@ err_snapc:
296 return -ENOMEM;
297 }
298
299 -static int snap_index(struct rbd_image_header *header, int snap_num)
300 -{
301 - return header->total_snaps - snap_num;
302 -}
303 -
304 -static u64 cur_snap_id(struct rbd_device *rbd_dev)
305 -{
306 - struct rbd_image_header *header = &rbd_dev->header;
307 -
308 - if (!rbd_dev->cur_snap)
309 - return 0;
310 -
311 - return header->snapc->snaps[snap_index(header, rbd_dev->cur_snap)];
312 -}
313 -
314 static int snap_by_name(struct rbd_image_header *header, const char *snap_name,
315 u64 *seq, u64 *size)
316 {
317 @@ -605,7 +589,7 @@ static int rbd_header_set_snap(struct rbd_device *dev, u64 *size)
318 snapc->seq = header->snap_seq;
319 else
320 snapc->seq = 0;
321 - dev->cur_snap = 0;
322 + dev->snap_id = CEPH_NOSNAP;
323 dev->read_only = 0;
324 if (size)
325 *size = header->image_size;
326 @@ -613,8 +597,7 @@ static int rbd_header_set_snap(struct rbd_device *dev, u64 *size)
327 ret = snap_by_name(header, dev->snap_name, &snapc->seq, size);
328 if (ret < 0)
329 goto done;
330 -
331 - dev->cur_snap = header->total_snaps - ret;
332 + dev->snap_id = snapc->seq;
333 dev->read_only = 1;
334 }
335
336 @@ -1521,7 +1504,7 @@ static void rbd_rq_fn(struct request_queue *q)
337 coll, cur_seg);
338 else
339 rbd_req_read(rq, rbd_dev,
340 - cur_snap_id(rbd_dev),
341 + rbd_dev->snap_id,
342 ofs,
343 op_size, bio,
344 coll, cur_seg);
345 @@ -1656,7 +1639,7 @@ static int rbd_header_add_snap(struct rbd_device *dev,
346 struct ceph_mon_client *monc;
347
348 /* we should create a snapshot only if we're pointing at the head */
349 - if (dev->cur_snap)
350 + if (dev->snap_id != CEPH_NOSNAP)
351 return -EINVAL;
352
353 monc = &dev->rbd_client->client->monc;
354 @@ -1683,7 +1666,9 @@ static int rbd_header_add_snap(struct rbd_device *dev,
355 if (ret < 0)
356 return ret;
357
358 - dev->header.snapc->seq = new_snapid;
359 + down_write(&dev->header_rwsem);
360 + dev->header.snapc->seq = new_snapid;
361 + up_write(&dev->header_rwsem);
362
363 return 0;
364 bad:
365 diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c
366 index 80b331c..5ba5e66 100644
367 --- a/drivers/gpu/drm/i915/intel_overlay.c
368 +++ b/drivers/gpu/drm/i915/intel_overlay.c
369 @@ -427,9 +427,17 @@ static int intel_overlay_off(struct intel_overlay *overlay)
370 OUT_RING(flip_addr);
371 OUT_RING(MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP);
372 /* turn overlay off */
373 - OUT_RING(MI_OVERLAY_FLIP | MI_OVERLAY_OFF);
374 - OUT_RING(flip_addr);
375 - OUT_RING(MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP);
376 + if (IS_I830(dev)) {
377 + /* Workaround: Don't disable the overlay fully, since otherwise
378 + * it dies on the next OVERLAY_ON cmd. */
379 + OUT_RING(MI_NOOP);
380 + OUT_RING(MI_NOOP);
381 + OUT_RING(MI_NOOP);
382 + } else {
383 + OUT_RING(MI_OVERLAY_FLIP | MI_OVERLAY_OFF);
384 + OUT_RING(flip_addr);
385 + OUT_RING(MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP);
386 + }
387 ADVANCE_LP_RING();
388
389 return intel_overlay_do_wait_request(overlay, request,
390 diff --git a/drivers/gpu/drm/radeon/atombios_encoders.c b/drivers/gpu/drm/radeon/atombios_encoders.c
391 index 2d07fbf..f6176bc 100644
392 --- a/drivers/gpu/drm/radeon/atombios_encoders.c
393 +++ b/drivers/gpu/drm/radeon/atombios_encoders.c
394 @@ -1421,7 +1421,7 @@ radeon_atom_encoder_dpms_dig(struct drm_encoder *encoder, int mode)
395 atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_SETUP, 0, 0);
396 atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_ENABLE, 0, 0);
397 /* some early dce3.2 boards have a bug in their transmitter control table */
398 - if ((rdev->family != CHIP_RV710) || (rdev->family != CHIP_RV730))
399 + if ((rdev->family != CHIP_RV710) && (rdev->family != CHIP_RV730))
400 atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_ENABLE_OUTPUT, 0, 0);
401 }
402 if (ENCODER_MODE_IS_DP(atombios_get_encoder_mode(encoder)) && connector) {
403 diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c b/drivers/gpu/drm/ttm/ttm_page_alloc.c
404 index ebc6fac..578207e 100644
405 --- a/drivers/gpu/drm/ttm/ttm_page_alloc.c
406 +++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c
407 @@ -749,7 +749,10 @@ static int ttm_get_pages(struct page **pages, unsigned npages, int flags,
408 /* clear the pages coming from the pool if requested */
409 if (flags & TTM_PAGE_FLAG_ZERO_ALLOC) {
410 list_for_each_entry(p, &plist, lru) {
411 - clear_page(page_address(p));
412 + if (PageHighMem(p))
413 + clear_highpage(p);
414 + else
415 + clear_page(page_address(p));
416 }
417 }
418
419 diff --git a/drivers/net/ethernet/marvell/sky2.c b/drivers/net/ethernet/marvell/sky2.c
420 index 589753f..2b78ddd 100644
421 --- a/drivers/net/ethernet/marvell/sky2.c
422 +++ b/drivers/net/ethernet/marvell/sky2.c
423 @@ -3079,8 +3079,10 @@ static irqreturn_t sky2_intr(int irq, void *dev_id)
424
425 /* Reading this mask interrupts as side effect */
426 status = sky2_read32(hw, B0_Y2_SP_ISRC2);
427 - if (status == 0 || status == ~0)
428 + if (status == 0 || status == ~0) {
429 + sky2_write32(hw, B0_Y2_SP_ICR, 2);
430 return IRQ_NONE;
431 + }
432
433 prefetch(&hw->st_le[hw->st_idx]);
434
435 diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
436 index 482dcd3..0dc70c2 100644
437 --- a/drivers/net/ethernet/realtek/r8169.c
438 +++ b/drivers/net/ethernet/realtek/r8169.c
439 @@ -73,7 +73,7 @@
440 static const int multicast_filter_limit = 32;
441
442 #define MAX_READ_REQUEST_SHIFT 12
443 -#define TX_DMA_BURST 6 /* Maximum PCI burst, '6' is 1024 */
444 +#define TX_DMA_BURST 7 /* Maximum PCI burst, '7' is unlimited */
445 #define SafeMtu 0x1c20 /* ... actually life sucks beyond ~7k */
446 #define InterFrameGap 0x03 /* 3 means InterFrameGap = the shortest one */
447
448 @@ -3488,6 +3488,8 @@ static void rtl_wol_suspend_quirk(struct rtl8169_private *tp)
449 void __iomem *ioaddr = tp->mmio_addr;
450
451 switch (tp->mac_version) {
452 + case RTL_GIGA_MAC_VER_25:
453 + case RTL_GIGA_MAC_VER_26:
454 case RTL_GIGA_MAC_VER_29:
455 case RTL_GIGA_MAC_VER_30:
456 case RTL_GIGA_MAC_VER_32:
457 @@ -4129,6 +4131,9 @@ static void rtl_set_rx_mode(struct net_device *dev)
458 mc_filter[1] = swab32(data);
459 }
460
461 + if (tp->mac_version == RTL_GIGA_MAC_VER_35)
462 + mc_filter[1] = mc_filter[0] = 0xffffffff;
463 +
464 RTL_W32(MAR0 + 4, mc_filter[1]);
465 RTL_W32(MAR0 + 0, mc_filter[0]);
466
467 diff --git a/drivers/staging/android/android_alarm.h b/drivers/staging/android/android_alarm.h
468 index 66b6e3d..6eecbde 100644
469 --- a/drivers/staging/android/android_alarm.h
470 +++ b/drivers/staging/android/android_alarm.h
471 @@ -110,12 +110,10 @@ enum android_alarm_return_flags {
472 #define ANDROID_ALARM_WAIT _IO('a', 1)
473
474 #define ALARM_IOW(c, type, size) _IOW('a', (c) | ((type) << 4), size)
475 -#define ALARM_IOR(c, type, size) _IOR('a', (c) | ((type) << 4), size)
476 -
477 /* Set alarm */
478 #define ANDROID_ALARM_SET(type) ALARM_IOW(2, type, struct timespec)
479 #define ANDROID_ALARM_SET_AND_WAIT(type) ALARM_IOW(3, type, struct timespec)
480 -#define ANDROID_ALARM_GET_TIME(type) ALARM_IOR(4, type, struct timespec)
481 +#define ANDROID_ALARM_GET_TIME(type) ALARM_IOW(4, type, struct timespec)
482 #define ANDROID_ALARM_SET_RTC _IOW('a', 5, struct timespec)
483 #define ANDROID_ALARM_BASE_CMD(cmd) (cmd & ~(_IOC(0, 0, 0xf0, 0)))
484 #define ANDROID_ALARM_IOCTL_TO_TYPE(cmd) (_IOC_NR(cmd) >> 4)
485 diff --git a/drivers/tty/serial/omap-serial.c b/drivers/tty/serial/omap-serial.c
486 index 6189923..d00b38e 100644
487 --- a/drivers/tty/serial/omap-serial.c
488 +++ b/drivers/tty/serial/omap-serial.c
489 @@ -649,19 +649,19 @@ serial_omap_configure_xonxoff
490
491 /*
492 * IXON Flag:
493 - * Flow control for OMAP.TX
494 - * OMAP.RX should listen for XON/XOFF
495 + * Enable XON/XOFF flow control on output.
496 + * Transmit XON1, XOFF1
497 */
498 if (termios->c_iflag & IXON)
499 - up->efr |= OMAP_UART_SW_RX;
500 + up->efr |= OMAP_UART_SW_TX;
501
502 /*
503 * IXOFF Flag:
504 - * Flow control for OMAP.RX
505 - * OMAP.TX should send XON/XOFF
506 + * Enable XON/XOFF flow control on input.
507 + * Receiver compares XON1, XOFF1.
508 */
509 if (termios->c_iflag & IXOFF)
510 - up->efr |= OMAP_UART_SW_TX;
511 + up->efr |= OMAP_UART_SW_RX;
512
513 serial_out(up, UART_EFR, up->efr | UART_EFR_ECB);
514 serial_out(up, UART_LCR, UART_LCR_CONF_MODE_A);
515 diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
516 index 17ec21e..43aa36b 100644
517 --- a/drivers/usb/serial/option.c
518 +++ b/drivers/usb/serial/option.c
519 @@ -157,6 +157,7 @@ static void option_instat_callback(struct urb *urb);
520 #define NOVATELWIRELESS_PRODUCT_EVDO_EMBEDDED_HIGHSPEED 0x8001
521 #define NOVATELWIRELESS_PRODUCT_HSPA_EMBEDDED_FULLSPEED 0x9000
522 #define NOVATELWIRELESS_PRODUCT_HSPA_EMBEDDED_HIGHSPEED 0x9001
523 +#define NOVATELWIRELESS_PRODUCT_E362 0x9010
524 #define NOVATELWIRELESS_PRODUCT_G1 0xA001
525 #define NOVATELWIRELESS_PRODUCT_G1_M 0xA002
526 #define NOVATELWIRELESS_PRODUCT_G2 0xA010
527 @@ -192,6 +193,9 @@ static void option_instat_callback(struct urb *urb);
528 #define DELL_PRODUCT_5730_MINICARD_TELUS 0x8181
529 #define DELL_PRODUCT_5730_MINICARD_VZW 0x8182
530
531 +#define DELL_PRODUCT_5800_MINICARD_VZW 0x8195 /* Novatel E362 */
532 +#define DELL_PRODUCT_5800_V2_MINICARD_VZW 0x8196 /* Novatel E362 */
533 +
534 #define KYOCERA_VENDOR_ID 0x0c88
535 #define KYOCERA_PRODUCT_KPC650 0x17da
536 #define KYOCERA_PRODUCT_KPC680 0x180a
537 @@ -282,6 +286,7 @@ static void option_instat_callback(struct urb *urb);
538 /* ALCATEL PRODUCTS */
539 #define ALCATEL_VENDOR_ID 0x1bbb
540 #define ALCATEL_PRODUCT_X060S_X200 0x0000
541 +#define ALCATEL_PRODUCT_X220_X500D 0x0017
542
543 #define PIRELLI_VENDOR_ID 0x1266
544 #define PIRELLI_PRODUCT_C100_1 0x1002
545 @@ -705,6 +710,7 @@ static const struct usb_device_id option_ids[] = {
546 { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_G2) },
547 /* Novatel Ovation MC551 a.k.a. Verizon USB551L */
548 { USB_DEVICE_AND_INTERFACE_INFO(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_MC551, 0xff, 0xff, 0xff) },
549 + { USB_DEVICE_AND_INTERFACE_INFO(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_E362, 0xff, 0xff, 0xff) },
550
551 { USB_DEVICE(AMOI_VENDOR_ID, AMOI_PRODUCT_H01) },
552 { USB_DEVICE(AMOI_VENDOR_ID, AMOI_PRODUCT_H01A) },
553 @@ -727,6 +733,8 @@ static const struct usb_device_id option_ids[] = {
554 { USB_DEVICE(DELL_VENDOR_ID, DELL_PRODUCT_5730_MINICARD_SPRINT) }, /* Dell Wireless 5730 Mobile Broadband EVDO/HSPA Mini-Card */
555 { USB_DEVICE(DELL_VENDOR_ID, DELL_PRODUCT_5730_MINICARD_TELUS) }, /* Dell Wireless 5730 Mobile Broadband EVDO/HSPA Mini-Card */
556 { USB_DEVICE(DELL_VENDOR_ID, DELL_PRODUCT_5730_MINICARD_VZW) }, /* Dell Wireless 5730 Mobile Broadband EVDO/HSPA Mini-Card */
557 + { USB_DEVICE_AND_INTERFACE_INFO(DELL_VENDOR_ID, DELL_PRODUCT_5800_MINICARD_VZW, 0xff, 0xff, 0xff) },
558 + { USB_DEVICE_AND_INTERFACE_INFO(DELL_VENDOR_ID, DELL_PRODUCT_5800_V2_MINICARD_VZW, 0xff, 0xff, 0xff) },
559 { USB_DEVICE(ANYDATA_VENDOR_ID, ANYDATA_PRODUCT_ADU_E100A) }, /* ADU-E100, ADU-310 */
560 { USB_DEVICE(ANYDATA_VENDOR_ID, ANYDATA_PRODUCT_ADU_500A) },
561 { USB_DEVICE(ANYDATA_VENDOR_ID, ANYDATA_PRODUCT_ADU_620UW) },
562 @@ -1156,6 +1164,7 @@ static const struct usb_device_id option_ids[] = {
563 { USB_DEVICE(ALCATEL_VENDOR_ID, ALCATEL_PRODUCT_X060S_X200),
564 .driver_info = (kernel_ulong_t)&alcatel_x200_blacklist
565 },
566 + { USB_DEVICE(ALCATEL_VENDOR_ID, ALCATEL_PRODUCT_X220_X500D) },
567 { USB_DEVICE(AIRPLUS_VENDOR_ID, AIRPLUS_PRODUCT_MCD650) },
568 { USB_DEVICE(TLAYTECH_VENDOR_ID, TLAYTECH_PRODUCT_TEU800) },
569 { USB_DEVICE(LONGCHEER_VENDOR_ID, FOUR_G_SYSTEMS_PRODUCT_W14),
570 diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c
571 index bcf2617..c627ba2 100644
572 --- a/drivers/usb/serial/usb-serial.c
573 +++ b/drivers/usb/serial/usb-serial.c
574 @@ -768,7 +768,7 @@ int usb_serial_probe(struct usb_interface *interface,
575
576 if (retval) {
577 dbg("sub driver rejected device");
578 - kfree(serial);
579 + usb_serial_put(serial);
580 module_put(type->driver.owner);
581 return retval;
582 }
583 @@ -840,7 +840,7 @@ int usb_serial_probe(struct usb_interface *interface,
584 */
585 if (num_bulk_in == 0 || num_bulk_out == 0) {
586 dev_info(&interface->dev, "PL-2303 hack: descriptors matched but endpoints did not\n");
587 - kfree(serial);
588 + usb_serial_put(serial);
589 module_put(type->driver.owner);
590 return -ENODEV;
591 }
592 @@ -854,7 +854,7 @@ int usb_serial_probe(struct usb_interface *interface,
593 if (num_ports == 0) {
594 dev_err(&interface->dev,
595 "Generic device with no bulk out, not allowed.\n");
596 - kfree(serial);
597 + usb_serial_put(serial);
598 module_put(type->driver.owner);
599 return -EIO;
600 }
601 diff --git a/drivers/xen/events.c b/drivers/xen/events.c
602 index 6908e4c..26c47a4 100644
603 --- a/drivers/xen/events.c
604 +++ b/drivers/xen/events.c
605 @@ -1365,8 +1365,8 @@ void xen_evtchn_do_upcall(struct pt_regs *regs)
606 {
607 struct pt_regs *old_regs = set_irq_regs(regs);
608
609 - exit_idle();
610 irq_enter();
611 + exit_idle();
612
613 __xen_evtchn_do_upcall();
614
615 diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
616 index 173b1d2..32ee086 100644
617 --- a/fs/ceph/addr.c
618 +++ b/fs/ceph/addr.c
619 @@ -54,7 +54,12 @@
620 (CONGESTION_ON_THRESH(congestion_kb) - \
621 (CONGESTION_ON_THRESH(congestion_kb) >> 2))
622
623 -
624 +static inline struct ceph_snap_context *page_snap_context(struct page *page)
625 +{
626 + if (PagePrivate(page))
627 + return (void *)page->private;
628 + return NULL;
629 +}
630
631 /*
632 * Dirty a page. Optimistically adjust accounting, on the assumption
633 @@ -142,10 +147,9 @@ static void ceph_invalidatepage(struct page *page, unsigned long offset)
634 {
635 struct inode *inode;
636 struct ceph_inode_info *ci;
637 - struct ceph_snap_context *snapc = (void *)page->private;
638 + struct ceph_snap_context *snapc = page_snap_context(page);
639
640 BUG_ON(!PageLocked(page));
641 - BUG_ON(!page->private);
642 BUG_ON(!PagePrivate(page));
643 BUG_ON(!page->mapping);
644
645 @@ -182,7 +186,6 @@ static int ceph_releasepage(struct page *page, gfp_t g)
646 struct inode *inode = page->mapping ? page->mapping->host : NULL;
647 dout("%p releasepage %p idx %lu\n", inode, page, page->index);
648 WARN_ON(PageDirty(page));
649 - WARN_ON(page->private);
650 WARN_ON(PagePrivate(page));
651 return 0;
652 }
653 @@ -202,7 +205,7 @@ static int readpage_nounlock(struct file *filp, struct page *page)
654 dout("readpage inode %p file %p page %p index %lu\n",
655 inode, filp, page, page->index);
656 err = ceph_osdc_readpages(osdc, ceph_vino(inode), &ci->i_layout,
657 - page->index << PAGE_CACHE_SHIFT, &len,
658 + (u64) page_offset(page), &len,
659 ci->i_truncate_seq, ci->i_truncate_size,
660 &page, 1, 0);
661 if (err == -ENOENT)
662 @@ -283,7 +286,7 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max)
663 int nr_pages = 0;
664 int ret;
665
666 - off = page->index << PAGE_CACHE_SHIFT;
667 + off = (u64) page_offset(page);
668
669 /* count pages */
670 next_index = page->index;
671 @@ -423,7 +426,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
672 struct ceph_inode_info *ci;
673 struct ceph_fs_client *fsc;
674 struct ceph_osd_client *osdc;
675 - loff_t page_off = page->index << PAGE_CACHE_SHIFT;
676 + loff_t page_off = page_offset(page);
677 int len = PAGE_CACHE_SIZE;
678 loff_t i_size;
679 int err = 0;
680 @@ -443,7 +446,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
681 osdc = &fsc->client->osdc;
682
683 /* verify this is a writeable snap context */
684 - snapc = (void *)page->private;
685 + snapc = page_snap_context(page);
686 if (snapc == NULL) {
687 dout("writepage %p page %p not dirty?\n", inode, page);
688 goto out;
689 @@ -451,7 +454,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
690 oldest = get_oldest_context(inode, &snap_size);
691 if (snapc->seq > oldest->seq) {
692 dout("writepage %p page %p snapc %p not writeable - noop\n",
693 - inode, page, (void *)page->private);
694 + inode, page, snapc);
695 /* we should only noop if called by kswapd */
696 WARN_ON((current->flags & PF_MEMALLOC) == 0);
697 ceph_put_snap_context(oldest);
698 @@ -591,7 +594,7 @@ static void writepages_finish(struct ceph_osd_request *req,
699 clear_bdi_congested(&fsc->backing_dev_info,
700 BLK_RW_ASYNC);
701
702 - ceph_put_snap_context((void *)page->private);
703 + ceph_put_snap_context(page_snap_context(page));
704 page->private = 0;
705 ClearPagePrivate(page);
706 dout("unlocking %d %p\n", i, page);
707 @@ -795,7 +798,7 @@ get_more_pages:
708 }
709
710 /* only if matching snap context */
711 - pgsnapc = (void *)page->private;
712 + pgsnapc = page_snap_context(page);
713 if (pgsnapc->seq > snapc->seq) {
714 dout("page snapc %p %lld > oldest %p %lld\n",
715 pgsnapc, pgsnapc->seq, snapc, snapc->seq);
716 @@ -814,8 +817,7 @@ get_more_pages:
717 /* ok */
718 if (locked_pages == 0) {
719 /* prepare async write request */
720 - offset = (unsigned long long)page->index
721 - << PAGE_CACHE_SHIFT;
722 + offset = (u64) page_offset(page);
723 len = wsize;
724 req = ceph_osdc_new_request(&fsc->client->osdc,
725 &ci->i_layout,
726 @@ -984,7 +986,7 @@ retry_locked:
727 BUG_ON(!ci->i_snap_realm);
728 down_read(&mdsc->snap_rwsem);
729 BUG_ON(!ci->i_snap_realm->cached_context);
730 - snapc = (void *)page->private;
731 + snapc = page_snap_context(page);
732 if (snapc && snapc != ci->i_head_snapc) {
733 /*
734 * this page is already dirty in another (older) snap
735 @@ -1177,7 +1179,7 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
736 struct inode *inode = vma->vm_file->f_dentry->d_inode;
737 struct page *page = vmf->page;
738 struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
739 - loff_t off = page->index << PAGE_CACHE_SHIFT;
740 + loff_t off = page_offset(page);
741 loff_t size, len;
742 int ret;
743
744 diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
745 index fb962ef..6d59006 100644
746 --- a/fs/ceph/debugfs.c
747 +++ b/fs/ceph/debugfs.c
748 @@ -201,6 +201,7 @@ int ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
749 int err = -ENOMEM;
750
751 dout("ceph_fs_debugfs_init\n");
752 + BUG_ON(!fsc->client->debugfs_dir);
753 fsc->debugfs_congestion_kb =
754 debugfs_create_file("writeback_congestion_kb",
755 0600,
756 diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
757 index 89971e1..7f1682d 100644
758 --- a/fs/ceph/mds_client.c
759 +++ b/fs/ceph/mds_client.c
760 @@ -334,10 +334,10 @@ void ceph_put_mds_session(struct ceph_mds_session *s)
761 dout("mdsc put_session %p %d -> %d\n", s,
762 atomic_read(&s->s_ref), atomic_read(&s->s_ref)-1);
763 if (atomic_dec_and_test(&s->s_ref)) {
764 - if (s->s_authorizer)
765 + if (s->s_auth.authorizer)
766 s->s_mdsc->fsc->client->monc.auth->ops->destroy_authorizer(
767 s->s_mdsc->fsc->client->monc.auth,
768 - s->s_authorizer);
769 + s->s_auth.authorizer);
770 kfree(s);
771 }
772 }
773 @@ -394,11 +394,7 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
774 s->s_seq = 0;
775 mutex_init(&s->s_mutex);
776
777 - ceph_con_init(mdsc->fsc->client->msgr, &s->s_con);
778 - s->s_con.private = s;
779 - s->s_con.ops = &mds_con_ops;
780 - s->s_con.peer_name.type = CEPH_ENTITY_TYPE_MDS;
781 - s->s_con.peer_name.num = cpu_to_le64(mds);
782 + ceph_con_init(&s->s_con, s, &mds_con_ops, &mdsc->fsc->client->msgr);
783
784 spin_lock_init(&s->s_gen_ttl_lock);
785 s->s_cap_gen = 0;
786 @@ -440,7 +436,8 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
787 mdsc->sessions[mds] = s;
788 atomic_inc(&s->s_ref); /* one ref to sessions[], one to caller */
789
790 - ceph_con_open(&s->s_con, ceph_mdsmap_get_addr(mdsc->mdsmap, mds));
791 + ceph_con_open(&s->s_con, CEPH_ENTITY_TYPE_MDS, mds,
792 + ceph_mdsmap_get_addr(mdsc->mdsmap, mds));
793
794 return s;
795
796 @@ -2532,6 +2529,7 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc,
797 session->s_seq = 0;
798
799 ceph_con_open(&session->s_con,
800 + CEPH_ENTITY_TYPE_MDS, mds,
801 ceph_mdsmap_get_addr(mdsc->mdsmap, mds));
802
803 /* replay unsafe requests */
804 @@ -2636,7 +2634,8 @@ static void check_new_map(struct ceph_mds_client *mdsc,
805 ceph_mdsmap_is_laggy(newmap, i) ? " (laggy)" : "",
806 session_state_name(s->s_state));
807
808 - if (memcmp(ceph_mdsmap_get_addr(oldmap, i),
809 + if (i >= newmap->m_max_mds ||
810 + memcmp(ceph_mdsmap_get_addr(oldmap, i),
811 ceph_mdsmap_get_addr(newmap, i),
812 sizeof(struct ceph_entity_addr))) {
813 if (s->s_state == CEPH_MDS_SESSION_OPENING) {
814 @@ -3395,39 +3394,33 @@ out:
815 /*
816 * authentication
817 */
818 -static int get_authorizer(struct ceph_connection *con,
819 - void **buf, int *len, int *proto,
820 - void **reply_buf, int *reply_len, int force_new)
821 +
822 +/*
823 + * Note: returned pointer is the address of a structure that's
824 + * managed separately. Caller must *not* attempt to free it.
825 + */
826 +static struct ceph_auth_handshake *get_authorizer(struct ceph_connection *con,
827 + int *proto, int force_new)
828 {
829 struct ceph_mds_session *s = con->private;
830 struct ceph_mds_client *mdsc = s->s_mdsc;
831 struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth;
832 - int ret = 0;
833 -
834 - if (force_new && s->s_authorizer) {
835 - ac->ops->destroy_authorizer(ac, s->s_authorizer);
836 - s->s_authorizer = NULL;
837 - }
838 - if (s->s_authorizer == NULL) {
839 - if (ac->ops->create_authorizer) {
840 - ret = ac->ops->create_authorizer(
841 - ac, CEPH_ENTITY_TYPE_MDS,
842 - &s->s_authorizer,
843 - &s->s_authorizer_buf,
844 - &s->s_authorizer_buf_len,
845 - &s->s_authorizer_reply_buf,
846 - &s->s_authorizer_reply_buf_len);
847 - if (ret)
848 - return ret;
849 - }
850 - }
851 + struct ceph_auth_handshake *auth = &s->s_auth;
852
853 + if (force_new && auth->authorizer) {
854 + if (ac->ops && ac->ops->destroy_authorizer)
855 + ac->ops->destroy_authorizer(ac, auth->authorizer);
856 + auth->authorizer = NULL;
857 + }
858 + if (!auth->authorizer && ac->ops && ac->ops->create_authorizer) {
859 + int ret = ac->ops->create_authorizer(ac, CEPH_ENTITY_TYPE_MDS,
860 + auth);
861 + if (ret)
862 + return ERR_PTR(ret);
863 + }
864 *proto = ac->protocol;
865 - *buf = s->s_authorizer_buf;
866 - *len = s->s_authorizer_buf_len;
867 - *reply_buf = s->s_authorizer_reply_buf;
868 - *reply_len = s->s_authorizer_reply_buf_len;
869 - return 0;
870 +
871 + return auth;
872 }
873
874
875 @@ -3437,7 +3430,7 @@ static int verify_authorizer_reply(struct ceph_connection *con, int len)
876 struct ceph_mds_client *mdsc = s->s_mdsc;
877 struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth;
878
879 - return ac->ops->verify_authorizer_reply(ac, s->s_authorizer, len);
880 + return ac->ops->verify_authorizer_reply(ac, s->s_auth.authorizer, len);
881 }
882
883 static int invalidate_authorizer(struct ceph_connection *con)
884 diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
885 index 8c7c04e..dd26846 100644
886 --- a/fs/ceph/mds_client.h
887 +++ b/fs/ceph/mds_client.h
888 @@ -11,6 +11,7 @@
889 #include <linux/ceph/types.h>
890 #include <linux/ceph/messenger.h>
891 #include <linux/ceph/mdsmap.h>
892 +#include <linux/ceph/auth.h>
893
894 /*
895 * Some lock dependencies:
896 @@ -113,9 +114,7 @@ struct ceph_mds_session {
897
898 struct ceph_connection s_con;
899
900 - struct ceph_authorizer *s_authorizer;
901 - void *s_authorizer_buf, *s_authorizer_reply_buf;
902 - size_t s_authorizer_buf_len, s_authorizer_reply_buf_len;
903 + struct ceph_auth_handshake s_auth;
904
905 /* protected by s_gen_ttl_lock */
906 spinlock_t s_gen_ttl_lock;
907 diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
908 index 3cc1b25..6ccf176 100644
909 --- a/fs/cifs/cifsacl.c
910 +++ b/fs/cifs/cifsacl.c
911 @@ -225,6 +225,13 @@ sid_to_str(struct cifs_sid *sidptr, char *sidstr)
912 }
913
914 static void
915 +cifs_copy_sid(struct cifs_sid *dst, const struct cifs_sid *src)
916 +{
917 + memcpy(dst, src, sizeof(*dst));
918 + dst->num_subauth = min_t(u8, src->num_subauth, NUM_SUBAUTHS);
919 +}
920 +
921 +static void
922 id_rb_insert(struct rb_root *root, struct cifs_sid *sidptr,
923 struct cifs_sid_id **psidid, char *typestr)
924 {
925 @@ -248,7 +255,7 @@ id_rb_insert(struct rb_root *root, struct cifs_sid *sidptr,
926 }
927 }
928
929 - memcpy(&(*psidid)->sid, sidptr, sizeof(struct cifs_sid));
930 + cifs_copy_sid(&(*psidid)->sid, sidptr);
931 (*psidid)->time = jiffies - (SID_MAP_RETRY + 1);
932 (*psidid)->refcount = 0;
933
934 @@ -354,7 +361,7 @@ id_to_sid(unsigned long cid, uint sidtype, struct cifs_sid *ssid)
935 * any fields of the node after a reference is put .
936 */
937 if (test_bit(SID_ID_MAPPED, &psidid->state)) {
938 - memcpy(ssid, &psidid->sid, sizeof(struct cifs_sid));
939 + cifs_copy_sid(ssid, &psidid->sid);
940 psidid->time = jiffies; /* update ts for accessing */
941 goto id_sid_out;
942 }
943 @@ -370,14 +377,14 @@ id_to_sid(unsigned long cid, uint sidtype, struct cifs_sid *ssid)
944 if (IS_ERR(sidkey)) {
945 rc = -EINVAL;
946 cFYI(1, "%s: Can't map and id to a SID", __func__);
947 + } else if (sidkey->datalen < sizeof(struct cifs_sid)) {
948 + rc = -EIO;
949 + cFYI(1, "%s: Downcall contained malformed key "
950 + "(datalen=%hu)", __func__, sidkey->datalen);
951 } else {
952 lsid = (struct cifs_sid *)sidkey->payload.data;
953 - memcpy(&psidid->sid, lsid,
954 - sidkey->datalen < sizeof(struct cifs_sid) ?
955 - sidkey->datalen : sizeof(struct cifs_sid));
956 - memcpy(ssid, &psidid->sid,
957 - sidkey->datalen < sizeof(struct cifs_sid) ?
958 - sidkey->datalen : sizeof(struct cifs_sid));
959 + cifs_copy_sid(&psidid->sid, lsid);
960 + cifs_copy_sid(ssid, &psidid->sid);
961 set_bit(SID_ID_MAPPED, &psidid->state);
962 key_put(sidkey);
963 kfree(psidid->sidstr);
964 @@ -396,7 +403,7 @@ id_to_sid(unsigned long cid, uint sidtype, struct cifs_sid *ssid)
965 return rc;
966 }
967 if (test_bit(SID_ID_MAPPED, &psidid->state))
968 - memcpy(ssid, &psidid->sid, sizeof(struct cifs_sid));
969 + cifs_copy_sid(ssid, &psidid->sid);
970 else
971 rc = -EINVAL;
972 }
973 @@ -675,8 +682,6 @@ int compare_sids(const struct cifs_sid *ctsid, const struct cifs_sid *cwsid)
974 static void copy_sec_desc(const struct cifs_ntsd *pntsd,
975 struct cifs_ntsd *pnntsd, __u32 sidsoffset)
976 {
977 - int i;
978 -
979 struct cifs_sid *owner_sid_ptr, *group_sid_ptr;
980 struct cifs_sid *nowner_sid_ptr, *ngroup_sid_ptr;
981
982 @@ -692,26 +697,14 @@ static void copy_sec_desc(const struct cifs_ntsd *pntsd,
983 owner_sid_ptr = (struct cifs_sid *)((char *)pntsd +
984 le32_to_cpu(pntsd->osidoffset));
985 nowner_sid_ptr = (struct cifs_sid *)((char *)pnntsd + sidsoffset);
986 -
987 - nowner_sid_ptr->revision = owner_sid_ptr->revision;
988 - nowner_sid_ptr->num_subauth = owner_sid_ptr->num_subauth;
989 - for (i = 0; i < 6; i++)
990 - nowner_sid_ptr->authority[i] = owner_sid_ptr->authority[i];
991 - for (i = 0; i < 5; i++)
992 - nowner_sid_ptr->sub_auth[i] = owner_sid_ptr->sub_auth[i];
993 + cifs_copy_sid(nowner_sid_ptr, owner_sid_ptr);
994
995 /* copy group sid */
996 group_sid_ptr = (struct cifs_sid *)((char *)pntsd +
997 le32_to_cpu(pntsd->gsidoffset));
998 ngroup_sid_ptr = (struct cifs_sid *)((char *)pnntsd + sidsoffset +
999 sizeof(struct cifs_sid));
1000 -
1001 - ngroup_sid_ptr->revision = group_sid_ptr->revision;
1002 - ngroup_sid_ptr->num_subauth = group_sid_ptr->num_subauth;
1003 - for (i = 0; i < 6; i++)
1004 - ngroup_sid_ptr->authority[i] = group_sid_ptr->authority[i];
1005 - for (i = 0; i < 5; i++)
1006 - ngroup_sid_ptr->sub_auth[i] = group_sid_ptr->sub_auth[i];
1007 + cifs_copy_sid(ngroup_sid_ptr, group_sid_ptr);
1008
1009 return;
1010 }
1011 @@ -1120,8 +1113,7 @@ static int build_sec_desc(struct cifs_ntsd *pntsd, struct cifs_ntsd *pnntsd,
1012 kfree(nowner_sid_ptr);
1013 return rc;
1014 }
1015 - memcpy(owner_sid_ptr, nowner_sid_ptr,
1016 - sizeof(struct cifs_sid));
1017 + cifs_copy_sid(owner_sid_ptr, nowner_sid_ptr);
1018 kfree(nowner_sid_ptr);
1019 *aclflag = CIFS_ACL_OWNER;
1020 }
1021 @@ -1139,8 +1131,7 @@ static int build_sec_desc(struct cifs_ntsd *pntsd, struct cifs_ntsd *pnntsd,
1022 kfree(ngroup_sid_ptr);
1023 return rc;
1024 }
1025 - memcpy(group_sid_ptr, ngroup_sid_ptr,
1026 - sizeof(struct cifs_sid));
1027 + cifs_copy_sid(group_sid_ptr, ngroup_sid_ptr);
1028 kfree(ngroup_sid_ptr);
1029 *aclflag = CIFS_ACL_GROUP;
1030 }
1031 diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
1032 index 0f04d2e..240832e 100644
1033 --- a/fs/ecryptfs/main.c
1034 +++ b/fs/ecryptfs/main.c
1035 @@ -280,6 +280,7 @@ static int ecryptfs_parse_options(struct ecryptfs_sb_info *sbi, char *options,
1036 char *fnek_src;
1037 char *cipher_key_bytes_src;
1038 char *fn_cipher_key_bytes_src;
1039 + u8 cipher_code;
1040
1041 *check_ruid = 0;
1042
1043 @@ -421,6 +422,18 @@ static int ecryptfs_parse_options(struct ecryptfs_sb_info *sbi, char *options,
1044 && !fn_cipher_key_bytes_set)
1045 mount_crypt_stat->global_default_fn_cipher_key_bytes =
1046 mount_crypt_stat->global_default_cipher_key_size;
1047 +
1048 + cipher_code = ecryptfs_code_for_cipher_string(
1049 + mount_crypt_stat->global_default_cipher_name,
1050 + mount_crypt_stat->global_default_cipher_key_size);
1051 + if (!cipher_code) {
1052 + ecryptfs_printk(KERN_ERR,
1053 + "eCryptfs doesn't support cipher: %s",
1054 + mount_crypt_stat->global_default_cipher_name);
1055 + rc = -EINVAL;
1056 + goto out;
1057 + }
1058 +
1059 mutex_lock(&key_tfm_list_mutex);
1060 if (!ecryptfs_tfm_exists(mount_crypt_stat->global_default_cipher_name,
1061 NULL)) {
1062 @@ -506,7 +519,6 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags
1063 goto out;
1064 }
1065
1066 - s->s_flags = flags;
1067 rc = bdi_setup_and_register(&sbi->bdi, "ecryptfs", BDI_CAP_MAP_COPY);
1068 if (rc)
1069 goto out1;
1070 @@ -542,6 +554,15 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags
1071 }
1072
1073 ecryptfs_set_superblock_lower(s, path.dentry->d_sb);
1074 +
1075 + /**
1076 + * Set the POSIX ACL flag based on whether they're enabled in the lower
1077 + * mount. Force a read-only eCryptfs mount if the lower mount is ro.
1078 + * Allow a ro eCryptfs mount even when the lower mount is rw.
1079 + */
1080 + s->s_flags = flags & ~MS_POSIXACL;
1081 + s->s_flags |= path.dentry->d_sb->s_flags & (MS_RDONLY | MS_POSIXACL);
1082 +
1083 s->s_maxbytes = path.dentry->d_sb->s_maxbytes;
1084 s->s_blocksize = path.dentry->d_sb->s_blocksize;
1085 s->s_magic = ECRYPTFS_SUPER_MAGIC;
1086 diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
1087 index 5e80180..8955e36 100644
1088 --- a/fs/nfs/nfs4proc.c
1089 +++ b/fs/nfs/nfs4proc.c
1090 @@ -307,8 +307,7 @@ static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struc
1091 dprintk("%s ERROR: %d Reset session\n", __func__,
1092 errorcode);
1093 nfs4_schedule_session_recovery(clp->cl_session);
1094 - exception->retry = 1;
1095 - break;
1096 + goto wait_on_recovery;
1097 #endif /* defined(CONFIG_NFS_V4_1) */
1098 case -NFS4ERR_FILE_OPEN:
1099 if (exception->timeout > HZ) {
1100 diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
1101 index f35794b..a506360 100644
1102 --- a/fs/notify/fanotify/fanotify.c
1103 +++ b/fs/notify/fanotify/fanotify.c
1104 @@ -21,6 +21,7 @@ static bool should_merge(struct fsnotify_event *old, struct fsnotify_event *new)
1105 if ((old->path.mnt == new->path.mnt) &&
1106 (old->path.dentry == new->path.dentry))
1107 return true;
1108 + break;
1109 case (FSNOTIFY_EVENT_NONE):
1110 return true;
1111 default:
1112 diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
1113 index f99c1b4..c11db51 100644
1114 --- a/fs/reiserfs/inode.c
1115 +++ b/fs/reiserfs/inode.c
1116 @@ -1788,8 +1788,9 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
1117
1118 BUG_ON(!th->t_trans_id);
1119
1120 - dquot_initialize(inode);
1121 + reiserfs_write_unlock(inode->i_sb);
1122 err = dquot_alloc_inode(inode);
1123 + reiserfs_write_lock(inode->i_sb);
1124 if (err)
1125 goto out_end_trans;
1126 if (!dir->i_nlink) {
1127 @@ -1985,8 +1986,10 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
1128
1129 out_end_trans:
1130 journal_end(th, th->t_super, th->t_blocks_allocated);
1131 + reiserfs_write_unlock(inode->i_sb);
1132 /* Drop can be outside and it needs more credits so it's better to have it outside */
1133 dquot_drop(inode);
1134 + reiserfs_write_lock(inode->i_sb);
1135 inode->i_flags |= S_NOQUOTA;
1136 make_bad_inode(inode);
1137
1138 @@ -3109,10 +3112,9 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)
1139 /* must be turned off for recursive notify_change calls */
1140 ia_valid = attr->ia_valid &= ~(ATTR_KILL_SUID|ATTR_KILL_SGID);
1141
1142 - depth = reiserfs_write_lock_once(inode->i_sb);
1143 if (is_quota_modification(inode, attr))
1144 dquot_initialize(inode);
1145 -
1146 + depth = reiserfs_write_lock_once(inode->i_sb);
1147 if (attr->ia_valid & ATTR_SIZE) {
1148 /* version 2 items will be caught by the s_maxbytes check
1149 ** done for us in vmtruncate
1150 @@ -3176,7 +3178,9 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)
1151 error = journal_begin(&th, inode->i_sb, jbegin_count);
1152 if (error)
1153 goto out;
1154 + reiserfs_write_unlock_once(inode->i_sb, depth);
1155 error = dquot_transfer(inode, attr);
1156 + depth = reiserfs_write_lock_once(inode->i_sb);
1157 if (error) {
1158 journal_end(&th, inode->i_sb, jbegin_count);
1159 goto out;
1160 diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c
1161 index f8afa4b..2f40a4c 100644
1162 --- a/fs/reiserfs/stree.c
1163 +++ b/fs/reiserfs/stree.c
1164 @@ -1968,7 +1968,9 @@ int reiserfs_paste_into_item(struct reiserfs_transaction_handle *th, struct tree
1165 key2type(&(key->on_disk_key)));
1166 #endif
1167
1168 + reiserfs_write_unlock(inode->i_sb);
1169 retval = dquot_alloc_space_nodirty(inode, pasted_size);
1170 + reiserfs_write_lock(inode->i_sb);
1171 if (retval) {
1172 pathrelse(search_path);
1173 return retval;
1174 @@ -2061,9 +2063,11 @@ int reiserfs_insert_item(struct reiserfs_transaction_handle *th,
1175 "reiserquota insert_item(): allocating %u id=%u type=%c",
1176 quota_bytes, inode->i_uid, head2type(ih));
1177 #endif
1178 + reiserfs_write_unlock(inode->i_sb);
1179 /* We can't dirty inode here. It would be immediately written but
1180 * appropriate stat item isn't inserted yet... */
1181 retval = dquot_alloc_space_nodirty(inode, quota_bytes);
1182 + reiserfs_write_lock(inode->i_sb);
1183 if (retval) {
1184 pathrelse(path);
1185 return retval;
1186 diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
1187 index 8b7616e..8169be9 100644
1188 --- a/fs/reiserfs/super.c
1189 +++ b/fs/reiserfs/super.c
1190 @@ -256,7 +256,9 @@ static int finish_unfinished(struct super_block *s)
1191 retval = remove_save_link_only(s, &save_link_key, 0);
1192 continue;
1193 }
1194 + reiserfs_write_unlock(s);
1195 dquot_initialize(inode);
1196 + reiserfs_write_lock(s);
1197
1198 if (truncate && S_ISDIR(inode->i_mode)) {
1199 /* We got a truncate request for a dir which is impossible.
1200 @@ -1292,7 +1294,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
1201 kfree(qf_names[i]);
1202 #endif
1203 err = -EINVAL;
1204 - goto out_err;
1205 + goto out_unlock;
1206 }
1207 #ifdef CONFIG_QUOTA
1208 handle_quota_files(s, qf_names, &qfmt);
1209 @@ -1336,7 +1338,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
1210 if (blocks) {
1211 err = reiserfs_resize(s, blocks);
1212 if (err != 0)
1213 - goto out_err;
1214 + goto out_unlock;
1215 }
1216
1217 if (*mount_flags & MS_RDONLY) {
1218 @@ -1346,9 +1348,15 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
1219 /* it is read-only already */
1220 goto out_ok;
1221
1222 + /*
1223 + * Drop write lock. Quota will retake it when needed and lock
1224 + * ordering requires calling dquot_suspend() without it.
1225 + */
1226 + reiserfs_write_unlock(s);
1227 err = dquot_suspend(s, -1);
1228 if (err < 0)
1229 goto out_err;
1230 + reiserfs_write_lock(s);
1231
1232 /* try to remount file system with read-only permissions */
1233 if (sb_umount_state(rs) == REISERFS_VALID_FS
1234 @@ -1358,7 +1366,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
1235
1236 err = journal_begin(&th, s, 10);
1237 if (err)
1238 - goto out_err;
1239 + goto out_unlock;
1240
1241 /* Mounting a rw partition read-only. */
1242 reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1);
1243 @@ -1373,7 +1381,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
1244
1245 if (reiserfs_is_journal_aborted(journal)) {
1246 err = journal->j_errno;
1247 - goto out_err;
1248 + goto out_unlock;
1249 }
1250
1251 handle_data_mode(s, mount_options);
1252 @@ -1382,7 +1390,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
1253 s->s_flags &= ~MS_RDONLY; /* now it is safe to call journal_begin */
1254 err = journal_begin(&th, s, 10);
1255 if (err)
1256 - goto out_err;
1257 + goto out_unlock;
1258
1259 /* Mount a partition which is read-only, read-write */
1260 reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1);
1261 @@ -1399,11 +1407,17 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
1262 SB_JOURNAL(s)->j_must_wait = 1;
1263 err = journal_end(&th, s, 10);
1264 if (err)
1265 - goto out_err;
1266 + goto out_unlock;
1267 s->s_dirt = 0;
1268
1269 if (!(*mount_flags & MS_RDONLY)) {
1270 + /*
1271 + * Drop write lock. Quota will retake it when needed and lock
1272 + * ordering requires calling dquot_resume() without it.
1273 + */
1274 + reiserfs_write_unlock(s);
1275 dquot_resume(s, -1);
1276 + reiserfs_write_lock(s);
1277 finish_unfinished(s);
1278 reiserfs_xattr_init(s, *mount_flags);
1279 }
1280 @@ -1413,9 +1427,10 @@ out_ok:
1281 reiserfs_write_unlock(s);
1282 return 0;
1283
1284 +out_unlock:
1285 + reiserfs_write_unlock(s);
1286 out_err:
1287 kfree(new_opts);
1288 - reiserfs_write_unlock(s);
1289 return err;
1290 }
1291
1292 @@ -2049,13 +2064,15 @@ static int reiserfs_write_dquot(struct dquot *dquot)
1293 REISERFS_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
1294 if (ret)
1295 goto out;
1296 + reiserfs_write_unlock(dquot->dq_sb);
1297 ret = dquot_commit(dquot);
1298 + reiserfs_write_lock(dquot->dq_sb);
1299 err =
1300 journal_end(&th, dquot->dq_sb,
1301 REISERFS_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
1302 if (!ret && err)
1303 ret = err;
1304 - out:
1305 +out:
1306 reiserfs_write_unlock(dquot->dq_sb);
1307 return ret;
1308 }
1309 @@ -2071,13 +2088,15 @@ static int reiserfs_acquire_dquot(struct dquot *dquot)
1310 REISERFS_QUOTA_INIT_BLOCKS(dquot->dq_sb));
1311 if (ret)
1312 goto out;
1313 + reiserfs_write_unlock(dquot->dq_sb);
1314 ret = dquot_acquire(dquot);
1315 + reiserfs_write_lock(dquot->dq_sb);
1316 err =
1317 journal_end(&th, dquot->dq_sb,
1318 REISERFS_QUOTA_INIT_BLOCKS(dquot->dq_sb));
1319 if (!ret && err)
1320 ret = err;
1321 - out:
1322 +out:
1323 reiserfs_write_unlock(dquot->dq_sb);
1324 return ret;
1325 }
1326 @@ -2091,19 +2110,21 @@ static int reiserfs_release_dquot(struct dquot *dquot)
1327 ret =
1328 journal_begin(&th, dquot->dq_sb,
1329 REISERFS_QUOTA_DEL_BLOCKS(dquot->dq_sb));
1330 + reiserfs_write_unlock(dquot->dq_sb);
1331 if (ret) {
1332 /* Release dquot anyway to avoid endless cycle in dqput() */
1333 dquot_release(dquot);
1334 goto out;
1335 }
1336 ret = dquot_release(dquot);
1337 + reiserfs_write_lock(dquot->dq_sb);
1338 err =
1339 journal_end(&th, dquot->dq_sb,
1340 REISERFS_QUOTA_DEL_BLOCKS(dquot->dq_sb));
1341 if (!ret && err)
1342 ret = err;
1343 - out:
1344 reiserfs_write_unlock(dquot->dq_sb);
1345 +out:
1346 return ret;
1347 }
1348
1349 @@ -2128,11 +2149,13 @@ static int reiserfs_write_info(struct super_block *sb, int type)
1350 ret = journal_begin(&th, sb, 2);
1351 if (ret)
1352 goto out;
1353 + reiserfs_write_unlock(sb);
1354 ret = dquot_commit_info(sb, type);
1355 + reiserfs_write_lock(sb);
1356 err = journal_end(&th, sb, 2);
1357 if (!ret && err)
1358 ret = err;
1359 - out:
1360 +out:
1361 reiserfs_write_unlock(sb);
1362 return ret;
1363 }
1364 @@ -2157,8 +2180,11 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
1365 struct reiserfs_transaction_handle th;
1366 int opt = type == USRQUOTA ? REISERFS_USRQUOTA : REISERFS_GRPQUOTA;
1367
1368 - if (!(REISERFS_SB(sb)->s_mount_opt & (1 << opt)))
1369 - return -EINVAL;
1370 + reiserfs_write_lock(sb);
1371 + if (!(REISERFS_SB(sb)->s_mount_opt & (1 << opt))) {
1372 + err = -EINVAL;
1373 + goto out;
1374 + }
1375
1376 /* Quotafile not on the same filesystem? */
1377 if (path->dentry->d_sb != sb) {
1378 @@ -2200,8 +2226,10 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
1379 if (err)
1380 goto out;
1381 }
1382 - err = dquot_quota_on(sb, type, format_id, path);
1383 + reiserfs_write_unlock(sb);
1384 + return dquot_quota_on(sb, type, format_id, path);
1385 out:
1386 + reiserfs_write_unlock(sb);
1387 return err;
1388 }
1389
1390 @@ -2275,7 +2303,9 @@ static ssize_t reiserfs_quota_write(struct super_block *sb, int type,
1391 tocopy = sb->s_blocksize - offset < towrite ?
1392 sb->s_blocksize - offset : towrite;
1393 tmp_bh.b_state = 0;
1394 + reiserfs_write_lock(sb);
1395 err = reiserfs_get_block(inode, blk, &tmp_bh, GET_BLOCK_CREATE);
1396 + reiserfs_write_unlock(sb);
1397 if (err)
1398 goto out;
1399 if (offset || tocopy != sb->s_blocksize)
1400 @@ -2291,10 +2321,12 @@ static ssize_t reiserfs_quota_write(struct super_block *sb, int type,
1401 flush_dcache_page(bh->b_page);
1402 set_buffer_uptodate(bh);
1403 unlock_buffer(bh);
1404 + reiserfs_write_lock(sb);
1405 reiserfs_prepare_for_journal(sb, bh, 1);
1406 journal_mark_dirty(current->journal_info, sb, bh);
1407 if (!journal_quota)
1408 reiserfs_add_ordered_list(inode, bh);
1409 + reiserfs_write_unlock(sb);
1410 brelse(bh);
1411 offset = 0;
1412 towrite -= tocopy;
1413 diff --git a/fs/ubifs/find.c b/fs/ubifs/find.c
1414 index 2559d17..5dc48ca 100644
1415 --- a/fs/ubifs/find.c
1416 +++ b/fs/ubifs/find.c
1417 @@ -681,8 +681,16 @@ int ubifs_find_free_leb_for_idx(struct ubifs_info *c)
1418 if (!lprops) {
1419 lprops = ubifs_fast_find_freeable(c);
1420 if (!lprops) {
1421 - ubifs_assert(c->freeable_cnt == 0);
1422 - if (c->lst.empty_lebs - c->lst.taken_empty_lebs > 0) {
1423 + /*
1424 + * The first condition means the following: go scan the
1425 + * LPT if there are uncategorized lprops, which means
1426 + * there may be freeable LEBs there (UBIFS does not
1427 + * store the information about freeable LEBs in the
1428 + * master node).
1429 + */
1430 + if (c->in_a_category_cnt != c->main_lebs ||
1431 + c->lst.empty_lebs - c->lst.taken_empty_lebs > 0) {
1432 + ubifs_assert(c->freeable_cnt == 0);
1433 lprops = scan_for_leb_for_idx(c);
1434 if (IS_ERR(lprops)) {
1435 err = PTR_ERR(lprops);
1436 diff --git a/fs/ubifs/lprops.c b/fs/ubifs/lprops.c
1437 index f8a181e..ea9d491 100644
1438 --- a/fs/ubifs/lprops.c
1439 +++ b/fs/ubifs/lprops.c
1440 @@ -300,8 +300,11 @@ void ubifs_add_to_cat(struct ubifs_info *c, struct ubifs_lprops *lprops,
1441 default:
1442 ubifs_assert(0);
1443 }
1444 +
1445 lprops->flags &= ~LPROPS_CAT_MASK;
1446 lprops->flags |= cat;
1447 + c->in_a_category_cnt += 1;
1448 + ubifs_assert(c->in_a_category_cnt <= c->main_lebs);
1449 }
1450
1451 /**
1452 @@ -334,6 +337,9 @@ static void ubifs_remove_from_cat(struct ubifs_info *c,
1453 default:
1454 ubifs_assert(0);
1455 }
1456 +
1457 + c->in_a_category_cnt -= 1;
1458 + ubifs_assert(c->in_a_category_cnt >= 0);
1459 }
1460
1461 /**
1462 diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
1463 index 93d59ac..4971cb2 100644
1464 --- a/fs/ubifs/ubifs.h
1465 +++ b/fs/ubifs/ubifs.h
1466 @@ -1184,6 +1184,8 @@ struct ubifs_debug_info;
1467 * @freeable_list: list of freeable non-index LEBs (free + dirty == @leb_size)
1468 * @frdi_idx_list: list of freeable index LEBs (free + dirty == @leb_size)
1469 * @freeable_cnt: number of freeable LEBs in @freeable_list
1470 + * @in_a_category_cnt: count of lprops which are in a certain category, which
1471 + * basically meants that they were loaded from the flash
1472 *
1473 * @ltab_lnum: LEB number of LPT's own lprops table
1474 * @ltab_offs: offset of LPT's own lprops table
1475 @@ -1413,6 +1415,7 @@ struct ubifs_info {
1476 struct list_head freeable_list;
1477 struct list_head frdi_idx_list;
1478 int freeable_cnt;
1479 + int in_a_category_cnt;
1480
1481 int ltab_lnum;
1482 int ltab_offs;
1483 diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
1484 index 6819b51..bb76128 100644
1485 --- a/fs/xfs/xfs_buf.c
1486 +++ b/fs/xfs/xfs_buf.c
1487 @@ -1165,9 +1165,14 @@ xfs_buf_bio_end_io(
1488 {
1489 xfs_buf_t *bp = (xfs_buf_t *)bio->bi_private;
1490
1491 - xfs_buf_ioerror(bp, -error);
1492 + /*
1493 + * don't overwrite existing errors - otherwise we can lose errors on
1494 + * buffers that require multiple bios to complete.
1495 + */
1496 + if (!bp->b_error)
1497 + xfs_buf_ioerror(bp, -error);
1498
1499 - if (!error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ))
1500 + if (!bp->b_error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ))
1501 invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp));
1502
1503 _xfs_buf_ioend(bp, 1);
1504 @@ -1243,6 +1248,11 @@ next_chunk:
1505 if (size)
1506 goto next_chunk;
1507 } else {
1508 + /*
1509 + * This is guaranteed not to be the last io reference count
1510 + * because the caller (xfs_buf_iorequest) holds a count itself.
1511 + */
1512 + atomic_dec(&bp->b_io_remaining);
1513 xfs_buf_ioerror(bp, EIO);
1514 bio_put(bio);
1515 }
1516 diff --git a/include/linux/ceph/auth.h b/include/linux/ceph/auth.h
1517 index aa13392..d4080f3 100644
1518 --- a/include/linux/ceph/auth.h
1519 +++ b/include/linux/ceph/auth.h
1520 @@ -14,6 +14,14 @@
1521 struct ceph_auth_client;
1522 struct ceph_authorizer;
1523
1524 +struct ceph_auth_handshake {
1525 + struct ceph_authorizer *authorizer;
1526 + void *authorizer_buf;
1527 + size_t authorizer_buf_len;
1528 + void *authorizer_reply_buf;
1529 + size_t authorizer_reply_buf_len;
1530 +};
1531 +
1532 struct ceph_auth_client_ops {
1533 const char *name;
1534
1535 @@ -43,9 +51,7 @@ struct ceph_auth_client_ops {
1536 * the response to authenticate the service.
1537 */
1538 int (*create_authorizer)(struct ceph_auth_client *ac, int peer_type,
1539 - struct ceph_authorizer **a,
1540 - void **buf, size_t *len,
1541 - void **reply_buf, size_t *reply_len);
1542 + struct ceph_auth_handshake *auth);
1543 int (*verify_authorizer_reply)(struct ceph_auth_client *ac,
1544 struct ceph_authorizer *a, size_t len);
1545 void (*destroy_authorizer)(struct ceph_auth_client *ac,
1546 diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h
1547 index e71d683..98ec36a 100644
1548 --- a/include/linux/ceph/libceph.h
1549 +++ b/include/linux/ceph/libceph.h
1550 @@ -132,7 +132,7 @@ struct ceph_client {
1551 u32 supported_features;
1552 u32 required_features;
1553
1554 - struct ceph_messenger *msgr; /* messenger instance */
1555 + struct ceph_messenger msgr; /* messenger instance */
1556 struct ceph_mon_client monc;
1557 struct ceph_osd_client osdc;
1558
1559 diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h
1560 index 3bff047..189ae06 100644
1561 --- a/include/linux/ceph/messenger.h
1562 +++ b/include/linux/ceph/messenger.h
1563 @@ -25,15 +25,12 @@ struct ceph_connection_operations {
1564 void (*dispatch) (struct ceph_connection *con, struct ceph_msg *m);
1565
1566 /* authorize an outgoing connection */
1567 - int (*get_authorizer) (struct ceph_connection *con,
1568 - void **buf, int *len, int *proto,
1569 - void **reply_buf, int *reply_len, int force_new);
1570 + struct ceph_auth_handshake *(*get_authorizer) (
1571 + struct ceph_connection *con,
1572 + int *proto, int force_new);
1573 int (*verify_authorizer_reply) (struct ceph_connection *con, int len);
1574 int (*invalidate_authorizer)(struct ceph_connection *con);
1575
1576 - /* protocol version mismatch */
1577 - void (*bad_proto) (struct ceph_connection *con);
1578 -
1579 /* there was some error on the socket (disconnect, whatever) */
1580 void (*fault) (struct ceph_connection *con);
1581
1582 @@ -53,6 +50,7 @@ struct ceph_messenger {
1583 struct ceph_entity_inst inst; /* my name+address */
1584 struct ceph_entity_addr my_enc_addr;
1585
1586 + atomic_t stopping;
1587 bool nocrc;
1588
1589 /*
1590 @@ -80,7 +78,10 @@ struct ceph_msg {
1591 unsigned nr_pages; /* size of page array */
1592 unsigned page_alignment; /* io offset in first page */
1593 struct ceph_pagelist *pagelist; /* instead of pages */
1594 +
1595 + struct ceph_connection *con;
1596 struct list_head list_head;
1597 +
1598 struct kref kref;
1599 struct bio *bio; /* instead of pages/pagelist */
1600 struct bio *bio_iter; /* bio iterator */
1601 @@ -106,23 +107,6 @@ struct ceph_msg_pos {
1602 #define MAX_DELAY_INTERVAL (5 * 60 * HZ)
1603
1604 /*
1605 - * ceph_connection state bit flags
1606 - */
1607 -#define LOSSYTX 0 /* we can close channel or drop messages on errors */
1608 -#define CONNECTING 1
1609 -#define NEGOTIATING 2
1610 -#define KEEPALIVE_PENDING 3
1611 -#define WRITE_PENDING 4 /* we have data ready to send */
1612 -#define STANDBY 8 /* no outgoing messages, socket closed. we keep
1613 - * the ceph_connection around to maintain shared
1614 - * state with the peer. */
1615 -#define CLOSED 10 /* we've closed the connection */
1616 -#define SOCK_CLOSED 11 /* socket state changed to closed */
1617 -#define OPENING 13 /* open connection w/ (possibly new) peer */
1618 -#define DEAD 14 /* dead, about to kfree */
1619 -#define BACKOFF 15
1620 -
1621 -/*
1622 * A single connection with another host.
1623 *
1624 * We maintain a queue of outgoing messages, and some session state to
1625 @@ -131,18 +115,22 @@ struct ceph_msg_pos {
1626 */
1627 struct ceph_connection {
1628 void *private;
1629 - atomic_t nref;
1630
1631 const struct ceph_connection_operations *ops;
1632
1633 struct ceph_messenger *msgr;
1634 +
1635 + atomic_t sock_state;
1636 struct socket *sock;
1637 - unsigned long state; /* connection state (see flags above) */
1638 + struct ceph_entity_addr peer_addr; /* peer address */
1639 + struct ceph_entity_addr peer_addr_for_me;
1640 +
1641 + unsigned long flags;
1642 + unsigned long state;
1643 const char *error_msg; /* error message, if any */
1644
1645 - struct ceph_entity_addr peer_addr; /* peer address */
1646 struct ceph_entity_name peer_name; /* peer name */
1647 - struct ceph_entity_addr peer_addr_for_me;
1648 +
1649 unsigned peer_features;
1650 u32 connect_seq; /* identify the most recent connection
1651 attempt for this connection, client */
1652 @@ -163,16 +151,8 @@ struct ceph_connection {
1653
1654 /* connection negotiation temps */
1655 char in_banner[CEPH_BANNER_MAX_LEN];
1656 - union {
1657 - struct { /* outgoing connection */
1658 - struct ceph_msg_connect out_connect;
1659 - struct ceph_msg_connect_reply in_reply;
1660 - };
1661 - struct { /* incoming */
1662 - struct ceph_msg_connect in_connect;
1663 - struct ceph_msg_connect_reply out_reply;
1664 - };
1665 - };
1666 + struct ceph_msg_connect out_connect;
1667 + struct ceph_msg_connect_reply in_reply;
1668 struct ceph_entity_addr actual_peer_addr;
1669
1670 /* message out temps */
1671 @@ -215,24 +195,26 @@ extern int ceph_msgr_init(void);
1672 extern void ceph_msgr_exit(void);
1673 extern void ceph_msgr_flush(void);
1674
1675 -extern struct ceph_messenger *ceph_messenger_create(
1676 - struct ceph_entity_addr *myaddr,
1677 - u32 features, u32 required);
1678 -extern void ceph_messenger_destroy(struct ceph_messenger *);
1679 +extern void ceph_messenger_init(struct ceph_messenger *msgr,
1680 + struct ceph_entity_addr *myaddr,
1681 + u32 supported_features,
1682 + u32 required_features,
1683 + bool nocrc);
1684
1685 -extern void ceph_con_init(struct ceph_messenger *msgr,
1686 - struct ceph_connection *con);
1687 +extern void ceph_con_init(struct ceph_connection *con, void *private,
1688 + const struct ceph_connection_operations *ops,
1689 + struct ceph_messenger *msgr);
1690 extern void ceph_con_open(struct ceph_connection *con,
1691 + __u8 entity_type, __u64 entity_num,
1692 struct ceph_entity_addr *addr);
1693 extern bool ceph_con_opened(struct ceph_connection *con);
1694 extern void ceph_con_close(struct ceph_connection *con);
1695 extern void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg);
1696 -extern void ceph_con_revoke(struct ceph_connection *con, struct ceph_msg *msg);
1697 -extern void ceph_con_revoke_message(struct ceph_connection *con,
1698 - struct ceph_msg *msg);
1699 +
1700 +extern void ceph_msg_revoke(struct ceph_msg *msg);
1701 +extern void ceph_msg_revoke_incoming(struct ceph_msg *msg);
1702 +
1703 extern void ceph_con_keepalive(struct ceph_connection *con);
1704 -extern struct ceph_connection *ceph_con_get(struct ceph_connection *con);
1705 -extern void ceph_con_put(struct ceph_connection *con);
1706
1707 extern struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags,
1708 bool can_fail);
1709 diff --git a/include/linux/ceph/mon_client.h b/include/linux/ceph/mon_client.h
1710 index 545f859..2113e38 100644
1711 --- a/include/linux/ceph/mon_client.h
1712 +++ b/include/linux/ceph/mon_client.h
1713 @@ -70,7 +70,7 @@ struct ceph_mon_client {
1714 bool hunting;
1715 int cur_mon; /* last monitor i contacted */
1716 unsigned long sub_sent, sub_renew_after;
1717 - struct ceph_connection *con;
1718 + struct ceph_connection con;
1719 bool have_fsid;
1720
1721 /* pending generic requests */
1722 diff --git a/include/linux/ceph/msgpool.h b/include/linux/ceph/msgpool.h
1723 index a362605..09fa96b 100644
1724 --- a/include/linux/ceph/msgpool.h
1725 +++ b/include/linux/ceph/msgpool.h
1726 @@ -11,10 +11,11 @@
1727 struct ceph_msgpool {
1728 const char *name;
1729 mempool_t *pool;
1730 + int type; /* preallocated message type */
1731 int front_len; /* preallocated payload size */
1732 };
1733
1734 -extern int ceph_msgpool_init(struct ceph_msgpool *pool,
1735 +extern int ceph_msgpool_init(struct ceph_msgpool *pool, int type,
1736 int front_len, int size, bool blocking,
1737 const char *name);
1738 extern void ceph_msgpool_destroy(struct ceph_msgpool *pool);
1739 diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
1740 index 7c05ac2..d9b880e 100644
1741 --- a/include/linux/ceph/osd_client.h
1742 +++ b/include/linux/ceph/osd_client.h
1743 @@ -6,9 +6,10 @@
1744 #include <linux/mempool.h>
1745 #include <linux/rbtree.h>
1746
1747 -#include "types.h"
1748 -#include "osdmap.h"
1749 -#include "messenger.h"
1750 +#include <linux/ceph/types.h>
1751 +#include <linux/ceph/osdmap.h>
1752 +#include <linux/ceph/messenger.h>
1753 +#include <linux/ceph/auth.h>
1754
1755 /*
1756 * Maximum object name size
1757 @@ -40,9 +41,7 @@ struct ceph_osd {
1758 struct list_head o_requests;
1759 struct list_head o_linger_requests;
1760 struct list_head o_osd_lru;
1761 - struct ceph_authorizer *o_authorizer;
1762 - void *o_authorizer_buf, *o_authorizer_reply_buf;
1763 - size_t o_authorizer_buf_len, o_authorizer_reply_buf_len;
1764 + struct ceph_auth_handshake o_auth;
1765 unsigned long lru_ttl;
1766 int o_marked_for_keepalive;
1767 struct list_head o_keepalive_item;
1768 @@ -208,7 +207,7 @@ extern void ceph_osdc_handle_reply(struct ceph_osd_client *osdc,
1769 extern void ceph_osdc_handle_map(struct ceph_osd_client *osdc,
1770 struct ceph_msg *msg);
1771
1772 -extern void ceph_calc_raw_layout(struct ceph_osd_client *osdc,
1773 +extern int ceph_calc_raw_layout(struct ceph_osd_client *osdc,
1774 struct ceph_file_layout *layout,
1775 u64 snapid,
1776 u64 off, u64 *plen, u64 *bno,
1777 diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h
1778 index ba4c205..11db454 100644
1779 --- a/include/linux/ceph/osdmap.h
1780 +++ b/include/linux/ceph/osdmap.h
1781 @@ -111,9 +111,9 @@ extern struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
1782 extern void ceph_osdmap_destroy(struct ceph_osdmap *map);
1783
1784 /* calculate mapping of a file extent to an object */
1785 -extern void ceph_calc_file_object_mapping(struct ceph_file_layout *layout,
1786 - u64 off, u64 *plen,
1787 - u64 *bno, u64 *oxoff, u64 *oxlen);
1788 +extern int ceph_calc_file_object_mapping(struct ceph_file_layout *layout,
1789 + u64 off, u64 *plen,
1790 + u64 *bno, u64 *oxoff, u64 *oxlen);
1791
1792 /* calculate mapping of object to a placement group */
1793 extern int ceph_calc_object_layout(struct ceph_object_layout *ol,
1794 diff --git a/include/linux/crush/crush.h b/include/linux/crush/crush.h
1795 index 97e435b..e7a8c90 100644
1796 --- a/include/linux/crush/crush.h
1797 +++ b/include/linux/crush/crush.h
1798 @@ -168,7 +168,7 @@ struct crush_map {
1799
1800
1801 /* crush.c */
1802 -extern int crush_get_bucket_item_weight(struct crush_bucket *b, int pos);
1803 +extern int crush_get_bucket_item_weight(const struct crush_bucket *b, int pos);
1804 extern void crush_calc_parents(struct crush_map *map);
1805 extern void crush_destroy_bucket_uniform(struct crush_bucket_uniform *b);
1806 extern void crush_destroy_bucket_list(struct crush_bucket_list *b);
1807 @@ -177,4 +177,9 @@ extern void crush_destroy_bucket_straw(struct crush_bucket_straw *b);
1808 extern void crush_destroy_bucket(struct crush_bucket *b);
1809 extern void crush_destroy(struct crush_map *map);
1810
1811 +static inline int crush_calc_tree_node(int i)
1812 +{
1813 + return ((i+1) << 1)-1;
1814 +}
1815 +
1816 #endif
1817 diff --git a/include/linux/crush/mapper.h b/include/linux/crush/mapper.h
1818 index c46b99c..9322ab8 100644
1819 --- a/include/linux/crush/mapper.h
1820 +++ b/include/linux/crush/mapper.h
1821 @@ -10,11 +10,11 @@
1822
1823 #include "crush.h"
1824
1825 -extern int crush_find_rule(struct crush_map *map, int pool, int type, int size);
1826 -extern int crush_do_rule(struct crush_map *map,
1827 +extern int crush_find_rule(const struct crush_map *map, int ruleset, int type, int size);
1828 +extern int crush_do_rule(const struct crush_map *map,
1829 int ruleno,
1830 int x, int *result, int result_max,
1831 int forcefeed, /* -1 for none */
1832 - __u32 *weights);
1833 + const __u32 *weights);
1834
1835 #endif
1836 diff --git a/kernel/module.c b/kernel/module.c
1837 index 61ea75e..8597217 100644
1838 --- a/kernel/module.c
1839 +++ b/kernel/module.c
1840 @@ -2273,12 +2273,17 @@ static void layout_symtab(struct module *mod, struct load_info *info)
1841 src = (void *)info->hdr + symsect->sh_offset;
1842 nsrc = symsect->sh_size / sizeof(*src);
1843
1844 + /* strtab always starts with a nul, so offset 0 is the empty string. */
1845 + strtab_size = 1;
1846 +
1847 /* Compute total space required for the core symbols' strtab. */
1848 - for (ndst = i = strtab_size = 1; i < nsrc; ++i, ++src)
1849 - if (is_core_symbol(src, info->sechdrs, info->hdr->e_shnum)) {
1850 - strtab_size += strlen(&info->strtab[src->st_name]) + 1;
1851 + for (ndst = i = 0; i < nsrc; i++) {
1852 + if (i == 0 ||
1853 + is_core_symbol(src+i, info->sechdrs, info->hdr->e_shnum)) {
1854 + strtab_size += strlen(&info->strtab[src[i].st_name])+1;
1855 ndst++;
1856 }
1857 + }
1858
1859 /* Append room for core symbols at end of core part. */
1860 info->symoffs = ALIGN(mod->core_size, symsect->sh_addralign ?: 1);
1861 @@ -2312,15 +2317,15 @@ static void add_kallsyms(struct module *mod, const struct load_info *info)
1862 mod->core_symtab = dst = mod->module_core + info->symoffs;
1863 mod->core_strtab = s = mod->module_core + info->stroffs;
1864 src = mod->symtab;
1865 - *dst = *src;
1866 *s++ = 0;
1867 - for (ndst = i = 1; i < mod->num_symtab; ++i, ++src) {
1868 - if (!is_core_symbol(src, info->sechdrs, info->hdr->e_shnum))
1869 - continue;
1870 -
1871 - dst[ndst] = *src;
1872 - dst[ndst++].st_name = s - mod->core_strtab;
1873 - s += strlcpy(s, &mod->strtab[src->st_name], KSYM_NAME_LEN) + 1;
1874 + for (ndst = i = 0; i < mod->num_symtab; i++) {
1875 + if (i == 0 ||
1876 + is_core_symbol(src+i, info->sechdrs, info->hdr->e_shnum)) {
1877 + dst[ndst] = src[i];
1878 + dst[ndst++].st_name = s - mod->core_strtab;
1879 + s += strlcpy(s, &mod->strtab[src[i].st_name],
1880 + KSYM_NAME_LEN) + 1;
1881 + }
1882 }
1883 mod->core_num_syms = ndst;
1884 }
1885 diff --git a/mm/memcontrol.c b/mm/memcontrol.c
1886 index 7685d4a..81c275b 100644
1887 --- a/mm/memcontrol.c
1888 +++ b/mm/memcontrol.c
1889 @@ -1489,17 +1489,26 @@ static int mem_cgroup_count_children(struct mem_cgroup *memcg)
1890 u64 mem_cgroup_get_limit(struct mem_cgroup *memcg)
1891 {
1892 u64 limit;
1893 - u64 memsw;
1894
1895 limit = res_counter_read_u64(&memcg->res, RES_LIMIT);
1896 - limit += total_swap_pages << PAGE_SHIFT;
1897
1898 - memsw = res_counter_read_u64(&memcg->memsw, RES_LIMIT);
1899 /*
1900 - * If memsw is finite and limits the amount of swap space available
1901 - * to this memcg, return that limit.
1902 + * Do not consider swap space if we cannot swap due to swappiness
1903 */
1904 - return min(limit, memsw);
1905 + if (mem_cgroup_swappiness(memcg)) {
1906 + u64 memsw;
1907 +
1908 + limit += total_swap_pages << PAGE_SHIFT;
1909 + memsw = res_counter_read_u64(&memcg->memsw, RES_LIMIT);
1910 +
1911 + /*
1912 + * If memsw is finite and limits the amount of swap space
1913 + * available to this memcg, return that limit.
1914 + */
1915 + limit = min(limit, memsw);
1916 + }
1917 +
1918 + return limit;
1919 }
1920
1921 static unsigned long mem_cgroup_reclaim(struct mem_cgroup *memcg,
1922 diff --git a/mm/shmem.c b/mm/shmem.c
1923 index 40383cd..a859b06 100644
1924 --- a/mm/shmem.c
1925 +++ b/mm/shmem.c
1926 @@ -595,7 +595,7 @@ static void shmem_evict_inode(struct inode *inode)
1927 kfree(xattr->name);
1928 kfree(xattr);
1929 }
1930 - BUG_ON(inode->i_blocks);
1931 + WARN_ON(inode->i_blocks);
1932 shmem_free_inode(inode->i_sb);
1933 end_writeback(inode);
1934 }
1935 diff --git a/mm/vmscan.c b/mm/vmscan.c
1936 index e989ee2..e6ca505 100644
1937 --- a/mm/vmscan.c
1938 +++ b/mm/vmscan.c
1939 @@ -3128,6 +3128,8 @@ static int kswapd(void *p)
1940 &balanced_classzone_idx);
1941 }
1942 }
1943 +
1944 + current->reclaim_state = NULL;
1945 return 0;
1946 }
1947
1948 diff --git a/net/ceph/auth_none.c b/net/ceph/auth_none.c
1949 index 214c2bb..925ca58 100644
1950 --- a/net/ceph/auth_none.c
1951 +++ b/net/ceph/auth_none.c
1952 @@ -59,9 +59,7 @@ static int handle_reply(struct ceph_auth_client *ac, int result,
1953 */
1954 static int ceph_auth_none_create_authorizer(
1955 struct ceph_auth_client *ac, int peer_type,
1956 - struct ceph_authorizer **a,
1957 - void **buf, size_t *len,
1958 - void **reply_buf, size_t *reply_len)
1959 + struct ceph_auth_handshake *auth)
1960 {
1961 struct ceph_auth_none_info *ai = ac->private;
1962 struct ceph_none_authorizer *au = &ai->au;
1963 @@ -82,11 +80,12 @@ static int ceph_auth_none_create_authorizer(
1964 dout("built authorizer len %d\n", au->buf_len);
1965 }
1966
1967 - *a = (struct ceph_authorizer *)au;
1968 - *buf = au->buf;
1969 - *len = au->buf_len;
1970 - *reply_buf = au->reply_buf;
1971 - *reply_len = sizeof(au->reply_buf);
1972 + auth->authorizer = (struct ceph_authorizer *) au;
1973 + auth->authorizer_buf = au->buf;
1974 + auth->authorizer_buf_len = au->buf_len;
1975 + auth->authorizer_reply_buf = au->reply_buf;
1976 + auth->authorizer_reply_buf_len = sizeof (au->reply_buf);
1977 +
1978 return 0;
1979
1980 bad2:
1981 diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c
1982 index 1587dc6..a16bf14 100644
1983 --- a/net/ceph/auth_x.c
1984 +++ b/net/ceph/auth_x.c
1985 @@ -526,9 +526,7 @@ static int ceph_x_handle_reply(struct ceph_auth_client *ac, int result,
1986
1987 static int ceph_x_create_authorizer(
1988 struct ceph_auth_client *ac, int peer_type,
1989 - struct ceph_authorizer **a,
1990 - void **buf, size_t *len,
1991 - void **reply_buf, size_t *reply_len)
1992 + struct ceph_auth_handshake *auth)
1993 {
1994 struct ceph_x_authorizer *au;
1995 struct ceph_x_ticket_handler *th;
1996 @@ -548,11 +546,12 @@ static int ceph_x_create_authorizer(
1997 return ret;
1998 }
1999
2000 - *a = (struct ceph_authorizer *)au;
2001 - *buf = au->buf->vec.iov_base;
2002 - *len = au->buf->vec.iov_len;
2003 - *reply_buf = au->reply_buf;
2004 - *reply_len = sizeof(au->reply_buf);
2005 + auth->authorizer = (struct ceph_authorizer *) au;
2006 + auth->authorizer_buf = au->buf->vec.iov_base;
2007 + auth->authorizer_buf_len = au->buf->vec.iov_len;
2008 + auth->authorizer_reply_buf = au->reply_buf;
2009 + auth->authorizer_reply_buf_len = sizeof (au->reply_buf);
2010 +
2011 return 0;
2012 }
2013
2014 diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
2015 index cc91319..8e74e8c 100644
2016 --- a/net/ceph/ceph_common.c
2017 +++ b/net/ceph/ceph_common.c
2018 @@ -83,7 +83,6 @@ int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid)
2019 return -1;
2020 }
2021 } else {
2022 - pr_info("client%lld fsid %pU\n", ceph_client_id(client), fsid);
2023 memcpy(&client->fsid, fsid, sizeof(*fsid));
2024 }
2025 return 0;
2026 @@ -468,19 +467,15 @@ struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private,
2027 /* msgr */
2028 if (ceph_test_opt(client, MYIP))
2029 myaddr = &client->options->my_addr;
2030 - client->msgr = ceph_messenger_create(myaddr,
2031 - client->supported_features,
2032 - client->required_features);
2033 - if (IS_ERR(client->msgr)) {
2034 - err = PTR_ERR(client->msgr);
2035 - goto fail;
2036 - }
2037 - client->msgr->nocrc = ceph_test_opt(client, NOCRC);
2038 + ceph_messenger_init(&client->msgr, myaddr,
2039 + client->supported_features,
2040 + client->required_features,
2041 + ceph_test_opt(client, NOCRC));
2042
2043 /* subsystems */
2044 err = ceph_monc_init(&client->monc, client);
2045 if (err < 0)
2046 - goto fail_msgr;
2047 + goto fail;
2048 err = ceph_osdc_init(&client->osdc, client);
2049 if (err < 0)
2050 goto fail_monc;
2051 @@ -489,8 +484,6 @@ struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private,
2052
2053 fail_monc:
2054 ceph_monc_stop(&client->monc);
2055 -fail_msgr:
2056 - ceph_messenger_destroy(client->msgr);
2057 fail:
2058 kfree(client);
2059 return ERR_PTR(err);
2060 @@ -501,22 +494,15 @@ void ceph_destroy_client(struct ceph_client *client)
2061 {
2062 dout("destroy_client %p\n", client);
2063
2064 + atomic_set(&client->msgr.stopping, 1);
2065 +
2066 /* unmount */
2067 ceph_osdc_stop(&client->osdc);
2068
2069 - /*
2070 - * make sure osd connections close out before destroying the
2071 - * auth module, which is needed to free those connections'
2072 - * ceph_authorizers.
2073 - */
2074 - ceph_msgr_flush();
2075 -
2076 ceph_monc_stop(&client->monc);
2077
2078 ceph_debugfs_client_cleanup(client);
2079
2080 - ceph_messenger_destroy(client->msgr);
2081 -
2082 ceph_destroy_options(client->options);
2083
2084 kfree(client);
2085 diff --git a/net/ceph/crush/crush.c b/net/ceph/crush/crush.c
2086 index d6ebb13..fbda052 100644
2087 --- a/net/ceph/crush/crush.c
2088 +++ b/net/ceph/crush/crush.c
2089 @@ -26,9 +26,9 @@ const char *crush_bucket_alg_name(int alg)
2090 * @b: bucket pointer
2091 * @p: item index in bucket
2092 */
2093 -int crush_get_bucket_item_weight(struct crush_bucket *b, int p)
2094 +int crush_get_bucket_item_weight(const struct crush_bucket *b, int p)
2095 {
2096 - if (p >= b->size)
2097 + if ((__u32)p >= b->size)
2098 return 0;
2099
2100 switch (b->alg) {
2101 @@ -37,9 +37,7 @@ int crush_get_bucket_item_weight(struct crush_bucket *b, int p)
2102 case CRUSH_BUCKET_LIST:
2103 return ((struct crush_bucket_list *)b)->item_weights[p];
2104 case CRUSH_BUCKET_TREE:
2105 - if (p & 1)
2106 - return ((struct crush_bucket_tree *)b)->node_weights[p];
2107 - return 0;
2108 + return ((struct crush_bucket_tree *)b)->node_weights[crush_calc_tree_node(p)];
2109 case CRUSH_BUCKET_STRAW:
2110 return ((struct crush_bucket_straw *)b)->item_weights[p];
2111 }
2112 @@ -87,6 +85,8 @@ void crush_destroy_bucket_list(struct crush_bucket_list *b)
2113
2114 void crush_destroy_bucket_tree(struct crush_bucket_tree *b)
2115 {
2116 + kfree(b->h.perm);
2117 + kfree(b->h.items);
2118 kfree(b->node_weights);
2119 kfree(b);
2120 }
2121 @@ -124,10 +124,9 @@ void crush_destroy_bucket(struct crush_bucket *b)
2122 */
2123 void crush_destroy(struct crush_map *map)
2124 {
2125 - int b;
2126 -
2127 /* buckets */
2128 if (map->buckets) {
2129 + __s32 b;
2130 for (b = 0; b < map->max_buckets; b++) {
2131 if (map->buckets[b] == NULL)
2132 continue;
2133 @@ -138,6 +137,7 @@ void crush_destroy(struct crush_map *map)
2134
2135 /* rules */
2136 if (map->rules) {
2137 + __u32 b;
2138 for (b = 0; b < map->max_rules; b++)
2139 kfree(map->rules[b]);
2140 kfree(map->rules);
2141 diff --git a/net/ceph/crush/mapper.c b/net/ceph/crush/mapper.c
2142 index b79747c..00baad5 100644
2143 --- a/net/ceph/crush/mapper.c
2144 +++ b/net/ceph/crush/mapper.c
2145 @@ -32,9 +32,9 @@
2146 * @type: storage ruleset type (user defined)
2147 * @size: output set size
2148 */
2149 -int crush_find_rule(struct crush_map *map, int ruleset, int type, int size)
2150 +int crush_find_rule(const struct crush_map *map, int ruleset, int type, int size)
2151 {
2152 - int i;
2153 + __u32 i;
2154
2155 for (i = 0; i < map->max_rules; i++) {
2156 if (map->rules[i] &&
2157 @@ -72,7 +72,7 @@ static int bucket_perm_choose(struct crush_bucket *bucket,
2158 unsigned i, s;
2159
2160 /* start a new permutation if @x has changed */
2161 - if (bucket->perm_x != x || bucket->perm_n == 0) {
2162 + if (bucket->perm_x != (__u32)x || bucket->perm_n == 0) {
2163 dprintk("bucket %d new x=%d\n", bucket->id, x);
2164 bucket->perm_x = x;
2165
2166 @@ -152,8 +152,8 @@ static int bucket_list_choose(struct crush_bucket_list *bucket,
2167 return bucket->h.items[i];
2168 }
2169
2170 - BUG_ON(1);
2171 - return 0;
2172 + dprintk("bad list sums for bucket %d\n", bucket->h.id);
2173 + return bucket->h.items[0];
2174 }
2175
2176
2177 @@ -219,7 +219,7 @@ static int bucket_tree_choose(struct crush_bucket_tree *bucket,
2178 static int bucket_straw_choose(struct crush_bucket_straw *bucket,
2179 int x, int r)
2180 {
2181 - int i;
2182 + __u32 i;
2183 int high = 0;
2184 __u64 high_draw = 0;
2185 __u64 draw;
2186 @@ -239,6 +239,7 @@ static int bucket_straw_choose(struct crush_bucket_straw *bucket,
2187 static int crush_bucket_choose(struct crush_bucket *in, int x, int r)
2188 {
2189 dprintk(" crush_bucket_choose %d x=%d r=%d\n", in->id, x, r);
2190 + BUG_ON(in->size == 0);
2191 switch (in->alg) {
2192 case CRUSH_BUCKET_UNIFORM:
2193 return bucket_uniform_choose((struct crush_bucket_uniform *)in,
2194 @@ -253,7 +254,7 @@ static int crush_bucket_choose(struct crush_bucket *in, int x, int r)
2195 return bucket_straw_choose((struct crush_bucket_straw *)in,
2196 x, r);
2197 default:
2198 - BUG_ON(1);
2199 + dprintk("unknown bucket %d alg %d\n", in->id, in->alg);
2200 return in->items[0];
2201 }
2202 }
2203 @@ -262,7 +263,7 @@ static int crush_bucket_choose(struct crush_bucket *in, int x, int r)
2204 * true if device is marked "out" (failed, fully offloaded)
2205 * of the cluster
2206 */
2207 -static int is_out(struct crush_map *map, __u32 *weight, int item, int x)
2208 +static int is_out(const struct crush_map *map, const __u32 *weight, int item, int x)
2209 {
2210 if (weight[item] >= 0x10000)
2211 return 0;
2212 @@ -287,16 +288,16 @@ static int is_out(struct crush_map *map, __u32 *weight, int item, int x)
2213 * @recurse_to_leaf: true if we want one device under each item of given type
2214 * @out2: second output vector for leaf items (if @recurse_to_leaf)
2215 */
2216 -static int crush_choose(struct crush_map *map,
2217 +static int crush_choose(const struct crush_map *map,
2218 struct crush_bucket *bucket,
2219 - __u32 *weight,
2220 + const __u32 *weight,
2221 int x, int numrep, int type,
2222 int *out, int outpos,
2223 int firstn, int recurse_to_leaf,
2224 int *out2)
2225 {
2226 int rep;
2227 - int ftotal, flocal;
2228 + unsigned int ftotal, flocal;
2229 int retry_descent, retry_bucket, skip_rep;
2230 struct crush_bucket *in = bucket;
2231 int r;
2232 @@ -304,7 +305,7 @@ static int crush_choose(struct crush_map *map,
2233 int item = 0;
2234 int itemtype;
2235 int collide, reject;
2236 - const int orig_tries = 5; /* attempts before we fall back to search */
2237 + const unsigned int orig_tries = 5; /* attempts before we fall back to search */
2238
2239 dprintk("CHOOSE%s bucket %d x %d outpos %d numrep %d\n", recurse_to_leaf ? "_LEAF" : "",
2240 bucket->id, x, outpos, numrep);
2241 @@ -325,7 +326,7 @@ static int crush_choose(struct crush_map *map,
2242 r = rep;
2243 if (in->alg == CRUSH_BUCKET_UNIFORM) {
2244 /* be careful */
2245 - if (firstn || numrep >= in->size)
2246 + if (firstn || (__u32)numrep >= in->size)
2247 /* r' = r + f_total */
2248 r += ftotal;
2249 else if (in->size % numrep == 0)
2250 @@ -354,7 +355,11 @@ static int crush_choose(struct crush_map *map,
2251 item = bucket_perm_choose(in, x, r);
2252 else
2253 item = crush_bucket_choose(in, x, r);
2254 - BUG_ON(item >= map->max_devices);
2255 + if (item >= map->max_devices) {
2256 + dprintk(" bad item %d\n", item);
2257 + skip_rep = 1;
2258 + break;
2259 + }
2260
2261 /* desired type? */
2262 if (item < 0)
2263 @@ -365,8 +370,12 @@ static int crush_choose(struct crush_map *map,
2264
2265 /* keep going? */
2266 if (itemtype != type) {
2267 - BUG_ON(item >= 0 ||
2268 - (-1-item) >= map->max_buckets);
2269 + if (item >= 0 ||
2270 + (-1-item) >= map->max_buckets) {
2271 + dprintk(" bad item type %d\n", type);
2272 + skip_rep = 1;
2273 + break;
2274 + }
2275 in = map->buckets[-1-item];
2276 retry_bucket = 1;
2277 continue;
2278 @@ -415,7 +424,7 @@ reject:
2279 if (collide && flocal < 3)
2280 /* retry locally a few times */
2281 retry_bucket = 1;
2282 - else if (flocal < in->size + orig_tries)
2283 + else if (flocal <= in->size + orig_tries)
2284 /* exhaustive bucket search */
2285 retry_bucket = 1;
2286 else if (ftotal < 20)
2287 @@ -425,7 +434,7 @@ reject:
2288 /* else give up */
2289 skip_rep = 1;
2290 dprintk(" reject %d collide %d "
2291 - "ftotal %d flocal %d\n",
2292 + "ftotal %u flocal %u\n",
2293 reject, collide, ftotal,
2294 flocal);
2295 }
2296 @@ -456,9 +465,9 @@ reject:
2297 * @result_max: maximum result size
2298 * @force: force initial replica choice; -1 for none
2299 */
2300 -int crush_do_rule(struct crush_map *map,
2301 +int crush_do_rule(const struct crush_map *map,
2302 int ruleno, int x, int *result, int result_max,
2303 - int force, __u32 *weight)
2304 + int force, const __u32 *weight)
2305 {
2306 int result_len;
2307 int force_context[CRUSH_MAX_DEPTH];
2308 @@ -473,12 +482,15 @@ int crush_do_rule(struct crush_map *map,
2309 int osize;
2310 int *tmp;
2311 struct crush_rule *rule;
2312 - int step;
2313 + __u32 step;
2314 int i, j;
2315 int numrep;
2316 int firstn;
2317
2318 - BUG_ON(ruleno >= map->max_rules);
2319 + if ((__u32)ruleno >= map->max_rules) {
2320 + dprintk(" bad ruleno %d\n", ruleno);
2321 + return 0;
2322 + }
2323
2324 rule = map->rules[ruleno];
2325 result_len = 0;
2326 @@ -488,7 +500,8 @@ int crush_do_rule(struct crush_map *map,
2327 /*
2328 * determine hierarchical context of force, if any. note
2329 * that this may or may not correspond to the specific types
2330 - * referenced by the crush rule.
2331 + * referenced by the crush rule. it will also only affect
2332 + * the first descent (TAKE).
2333 */
2334 if (force >= 0 &&
2335 force < map->max_devices &&
2336 @@ -527,7 +540,8 @@ int crush_do_rule(struct crush_map *map,
2337 firstn = 1;
2338 case CRUSH_RULE_CHOOSE_LEAF_INDEP:
2339 case CRUSH_RULE_CHOOSE_INDEP:
2340 - BUG_ON(wsize == 0);
2341 + if (wsize == 0)
2342 + break;
2343
2344 recurse_to_leaf =
2345 rule->steps[step].op ==
2346 @@ -596,7 +610,9 @@ int crush_do_rule(struct crush_map *map,
2347 break;
2348
2349 default:
2350 - BUG_ON(1);
2351 + dprintk(" unknown op %d at step %d\n",
2352 + curstep->op, step);
2353 + break;
2354 }
2355 }
2356 return result_len;
2357 diff --git a/net/ceph/crypto.c b/net/ceph/crypto.c
2358 index b780cb7..9da7fdd 100644
2359 --- a/net/ceph/crypto.c
2360 +++ b/net/ceph/crypto.c
2361 @@ -466,6 +466,7 @@ void ceph_key_destroy(struct key *key) {
2362 struct ceph_crypto_key *ckey = key->payload.data;
2363
2364 ceph_crypto_key_destroy(ckey);
2365 + kfree(ckey);
2366 }
2367
2368 struct key_type key_type_ceph = {
2369 diff --git a/net/ceph/crypto.h b/net/ceph/crypto.h
2370 index 1919d15..3572dc5 100644
2371 --- a/net/ceph/crypto.h
2372 +++ b/net/ceph/crypto.h
2373 @@ -16,7 +16,8 @@ struct ceph_crypto_key {
2374
2375 static inline void ceph_crypto_key_destroy(struct ceph_crypto_key *key)
2376 {
2377 - kfree(key->key);
2378 + if (key)
2379 + kfree(key->key);
2380 }
2381
2382 extern int ceph_crypto_key_clone(struct ceph_crypto_key *dst,
2383 diff --git a/net/ceph/debugfs.c b/net/ceph/debugfs.c
2384 index 27d4ea3..680978d 100644
2385 --- a/net/ceph/debugfs.c
2386 +++ b/net/ceph/debugfs.c
2387 @@ -189,6 +189,9 @@ int ceph_debugfs_client_init(struct ceph_client *client)
2388 snprintf(name, sizeof(name), "%pU.client%lld", &client->fsid,
2389 client->monc.auth->global_id);
2390
2391 + dout("ceph_debugfs_client_init %p %s\n", client, name);
2392 +
2393 + BUG_ON(client->debugfs_dir);
2394 client->debugfs_dir = debugfs_create_dir(name, ceph_debugfs_dir);
2395 if (!client->debugfs_dir)
2396 goto out;
2397 @@ -234,6 +237,7 @@ out:
2398
2399 void ceph_debugfs_client_cleanup(struct ceph_client *client)
2400 {
2401 + dout("ceph_debugfs_client_cleanup %p\n", client);
2402 debugfs_remove(client->debugfs_osdmap);
2403 debugfs_remove(client->debugfs_monmap);
2404 debugfs_remove(client->osdc.debugfs_file);
2405 diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
2406 index f0993af..aa71a67 100644
2407 --- a/net/ceph/messenger.c
2408 +++ b/net/ceph/messenger.c
2409 @@ -29,6 +29,74 @@
2410 * the sender.
2411 */
2412
2413 +/*
2414 + * We track the state of the socket on a given connection using
2415 + * values defined below. The transition to a new socket state is
2416 + * handled by a function which verifies we aren't coming from an
2417 + * unexpected state.
2418 + *
2419 + * --------
2420 + * | NEW* | transient initial state
2421 + * --------
2422 + * | con_sock_state_init()
2423 + * v
2424 + * ----------
2425 + * | CLOSED | initialized, but no socket (and no
2426 + * ---------- TCP connection)
2427 + * ^ \
2428 + * | \ con_sock_state_connecting()
2429 + * | ----------------------
2430 + * | \
2431 + * + con_sock_state_closed() \
2432 + * |+--------------------------- \
2433 + * | \ \ \
2434 + * | ----------- \ \
2435 + * | | CLOSING | socket event; \ \
2436 + * | ----------- await close \ \
2437 + * | ^ \ |
2438 + * | | \ |
2439 + * | + con_sock_state_closing() \ |
2440 + * | / \ | |
2441 + * | / --------------- | |
2442 + * | / \ v v
2443 + * | / --------------
2444 + * | / -----------------| CONNECTING | socket created, TCP
2445 + * | | / -------------- connect initiated
2446 + * | | | con_sock_state_connected()
2447 + * | | v
2448 + * -------------
2449 + * | CONNECTED | TCP connection established
2450 + * -------------
2451 + *
2452 + * State values for ceph_connection->sock_state; NEW is assumed to be 0.
2453 + */
2454 +
2455 +#define CON_SOCK_STATE_NEW 0 /* -> CLOSED */
2456 +#define CON_SOCK_STATE_CLOSED 1 /* -> CONNECTING */
2457 +#define CON_SOCK_STATE_CONNECTING 2 /* -> CONNECTED or -> CLOSING */
2458 +#define CON_SOCK_STATE_CONNECTED 3 /* -> CLOSING or -> CLOSED */
2459 +#define CON_SOCK_STATE_CLOSING 4 /* -> CLOSED */
2460 +
2461 +/*
2462 + * connection states
2463 + */
2464 +#define CON_STATE_CLOSED 1 /* -> PREOPEN */
2465 +#define CON_STATE_PREOPEN 2 /* -> CONNECTING, CLOSED */
2466 +#define CON_STATE_CONNECTING 3 /* -> NEGOTIATING, CLOSED */
2467 +#define CON_STATE_NEGOTIATING 4 /* -> OPEN, CLOSED */
2468 +#define CON_STATE_OPEN 5 /* -> STANDBY, CLOSED */
2469 +#define CON_STATE_STANDBY 6 /* -> PREOPEN, CLOSED */
2470 +
2471 +/*
2472 + * ceph_connection flag bits
2473 + */
2474 +#define CON_FLAG_LOSSYTX 0 /* we can close channel or drop
2475 + * messages on errors */
2476 +#define CON_FLAG_KEEPALIVE_PENDING 1 /* we need to send a keepalive */
2477 +#define CON_FLAG_WRITE_PENDING 2 /* we have data ready to send */
2478 +#define CON_FLAG_SOCK_CLOSED 3 /* socket state changed to closed */
2479 +#define CON_FLAG_BACKOFF 4 /* need to retry queuing delayed work */
2480 +
2481 /* static tag bytes (protocol control messages) */
2482 static char tag_msg = CEPH_MSGR_TAG_MSG;
2483 static char tag_ack = CEPH_MSGR_TAG_ACK;
2484 @@ -147,72 +215,130 @@ void ceph_msgr_flush(void)
2485 }
2486 EXPORT_SYMBOL(ceph_msgr_flush);
2487
2488 +/* Connection socket state transition functions */
2489 +
2490 +static void con_sock_state_init(struct ceph_connection *con)
2491 +{
2492 + int old_state;
2493 +
2494 + old_state = atomic_xchg(&con->sock_state, CON_SOCK_STATE_CLOSED);
2495 + if (WARN_ON(old_state != CON_SOCK_STATE_NEW))
2496 + printk("%s: unexpected old state %d\n", __func__, old_state);
2497 + dout("%s con %p sock %d -> %d\n", __func__, con, old_state,
2498 + CON_SOCK_STATE_CLOSED);
2499 +}
2500 +
2501 +static void con_sock_state_connecting(struct ceph_connection *con)
2502 +{
2503 + int old_state;
2504 +
2505 + old_state = atomic_xchg(&con->sock_state, CON_SOCK_STATE_CONNECTING);
2506 + if (WARN_ON(old_state != CON_SOCK_STATE_CLOSED))
2507 + printk("%s: unexpected old state %d\n", __func__, old_state);
2508 + dout("%s con %p sock %d -> %d\n", __func__, con, old_state,
2509 + CON_SOCK_STATE_CONNECTING);
2510 +}
2511 +
2512 +static void con_sock_state_connected(struct ceph_connection *con)
2513 +{
2514 + int old_state;
2515 +
2516 + old_state = atomic_xchg(&con->sock_state, CON_SOCK_STATE_CONNECTED);
2517 + if (WARN_ON(old_state != CON_SOCK_STATE_CONNECTING))
2518 + printk("%s: unexpected old state %d\n", __func__, old_state);
2519 + dout("%s con %p sock %d -> %d\n", __func__, con, old_state,
2520 + CON_SOCK_STATE_CONNECTED);
2521 +}
2522 +
2523 +static void con_sock_state_closing(struct ceph_connection *con)
2524 +{
2525 + int old_state;
2526 +
2527 + old_state = atomic_xchg(&con->sock_state, CON_SOCK_STATE_CLOSING);
2528 + if (WARN_ON(old_state != CON_SOCK_STATE_CONNECTING &&
2529 + old_state != CON_SOCK_STATE_CONNECTED &&
2530 + old_state != CON_SOCK_STATE_CLOSING))
2531 + printk("%s: unexpected old state %d\n", __func__, old_state);
2532 + dout("%s con %p sock %d -> %d\n", __func__, con, old_state,
2533 + CON_SOCK_STATE_CLOSING);
2534 +}
2535 +
2536 +static void con_sock_state_closed(struct ceph_connection *con)
2537 +{
2538 + int old_state;
2539 +
2540 + old_state = atomic_xchg(&con->sock_state, CON_SOCK_STATE_CLOSED);
2541 + if (WARN_ON(old_state != CON_SOCK_STATE_CONNECTED &&
2542 + old_state != CON_SOCK_STATE_CLOSING &&
2543 + old_state != CON_SOCK_STATE_CONNECTING &&
2544 + old_state != CON_SOCK_STATE_CLOSED))
2545 + printk("%s: unexpected old state %d\n", __func__, old_state);
2546 + dout("%s con %p sock %d -> %d\n", __func__, con, old_state,
2547 + CON_SOCK_STATE_CLOSED);
2548 +}
2549
2550 /*
2551 * socket callback functions
2552 */
2553
2554 /* data available on socket, or listen socket received a connect */
2555 -static void ceph_data_ready(struct sock *sk, int count_unused)
2556 +static void ceph_sock_data_ready(struct sock *sk, int count_unused)
2557 {
2558 struct ceph_connection *con = sk->sk_user_data;
2559 + if (atomic_read(&con->msgr->stopping)) {
2560 + return;
2561 + }
2562
2563 if (sk->sk_state != TCP_CLOSE_WAIT) {
2564 - dout("ceph_data_ready on %p state = %lu, queueing work\n",
2565 + dout("%s on %p state = %lu, queueing work\n", __func__,
2566 con, con->state);
2567 queue_con(con);
2568 }
2569 }
2570
2571 /* socket has buffer space for writing */
2572 -static void ceph_write_space(struct sock *sk)
2573 +static void ceph_sock_write_space(struct sock *sk)
2574 {
2575 struct ceph_connection *con = sk->sk_user_data;
2576
2577 /* only queue to workqueue if there is data we want to write,
2578 * and there is sufficient space in the socket buffer to accept
2579 - * more data. clear SOCK_NOSPACE so that ceph_write_space()
2580 + * more data. clear SOCK_NOSPACE so that ceph_sock_write_space()
2581 * doesn't get called again until try_write() fills the socket
2582 * buffer. See net/ipv4/tcp_input.c:tcp_check_space()
2583 * and net/core/stream.c:sk_stream_write_space().
2584 */
2585 - if (test_bit(WRITE_PENDING, &con->state)) {
2586 + if (test_bit(CON_FLAG_WRITE_PENDING, &con->flags)) {
2587 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
2588 - dout("ceph_write_space %p queueing write work\n", con);
2589 + dout("%s %p queueing write work\n", __func__, con);
2590 clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
2591 queue_con(con);
2592 }
2593 } else {
2594 - dout("ceph_write_space %p nothing to write\n", con);
2595 + dout("%s %p nothing to write\n", __func__, con);
2596 }
2597 }
2598
2599 /* socket's state has changed */
2600 -static void ceph_state_change(struct sock *sk)
2601 +static void ceph_sock_state_change(struct sock *sk)
2602 {
2603 struct ceph_connection *con = sk->sk_user_data;
2604
2605 - dout("ceph_state_change %p state = %lu sk_state = %u\n",
2606 + dout("%s %p state = %lu sk_state = %u\n", __func__,
2607 con, con->state, sk->sk_state);
2608
2609 - if (test_bit(CLOSED, &con->state))
2610 - return;
2611 -
2612 switch (sk->sk_state) {
2613 case TCP_CLOSE:
2614 - dout("ceph_state_change TCP_CLOSE\n");
2615 + dout("%s TCP_CLOSE\n", __func__);
2616 case TCP_CLOSE_WAIT:
2617 - dout("ceph_state_change TCP_CLOSE_WAIT\n");
2618 - if (test_and_set_bit(SOCK_CLOSED, &con->state) == 0) {
2619 - if (test_bit(CONNECTING, &con->state))
2620 - con->error_msg = "connection failed";
2621 - else
2622 - con->error_msg = "socket closed";
2623 - queue_con(con);
2624 - }
2625 + dout("%s TCP_CLOSE_WAIT\n", __func__);
2626 + con_sock_state_closing(con);
2627 + set_bit(CON_FLAG_SOCK_CLOSED, &con->flags);
2628 + queue_con(con);
2629 break;
2630 case TCP_ESTABLISHED:
2631 - dout("ceph_state_change TCP_ESTABLISHED\n");
2632 + dout("%s TCP_ESTABLISHED\n", __func__);
2633 + con_sock_state_connected(con);
2634 queue_con(con);
2635 break;
2636 default: /* Everything else is uninteresting */
2637 @@ -228,9 +354,9 @@ static void set_sock_callbacks(struct socket *sock,
2638 {
2639 struct sock *sk = sock->sk;
2640 sk->sk_user_data = con;
2641 - sk->sk_data_ready = ceph_data_ready;
2642 - sk->sk_write_space = ceph_write_space;
2643 - sk->sk_state_change = ceph_state_change;
2644 + sk->sk_data_ready = ceph_sock_data_ready;
2645 + sk->sk_write_space = ceph_sock_write_space;
2646 + sk->sk_state_change = ceph_sock_state_change;
2647 }
2648
2649
2650 @@ -262,6 +388,7 @@ static int ceph_tcp_connect(struct ceph_connection *con)
2651
2652 dout("connect %s\n", ceph_pr_addr(&con->peer_addr.in_addr));
2653
2654 + con_sock_state_connecting(con);
2655 ret = sock->ops->connect(sock, (struct sockaddr *)paddr, sizeof(*paddr),
2656 O_NONBLOCK);
2657 if (ret == -EINPROGRESS) {
2658 @@ -277,7 +404,6 @@ static int ceph_tcp_connect(struct ceph_connection *con)
2659 return ret;
2660 }
2661 con->sock = sock;
2662 -
2663 return 0;
2664 }
2665
2666 @@ -333,16 +459,24 @@ static int ceph_tcp_sendpage(struct socket *sock, struct page *page,
2667 */
2668 static int con_close_socket(struct ceph_connection *con)
2669 {
2670 - int rc;
2671 + int rc = 0;
2672
2673 dout("con_close_socket on %p sock %p\n", con, con->sock);
2674 - if (!con->sock)
2675 - return 0;
2676 - set_bit(SOCK_CLOSED, &con->state);
2677 - rc = con->sock->ops->shutdown(con->sock, SHUT_RDWR);
2678 - sock_release(con->sock);
2679 - con->sock = NULL;
2680 - clear_bit(SOCK_CLOSED, &con->state);
2681 + if (con->sock) {
2682 + rc = con->sock->ops->shutdown(con->sock, SHUT_RDWR);
2683 + sock_release(con->sock);
2684 + con->sock = NULL;
2685 + }
2686 +
2687 + /*
2688 + * Forcibly clear the SOCK_CLOSED flag. It gets set
2689 + * independent of the connection mutex, and we could have
2690 + * received a socket close event before we had the chance to
2691 + * shut the socket down.
2692 + */
2693 + clear_bit(CON_FLAG_SOCK_CLOSED, &con->flags);
2694 +
2695 + con_sock_state_closed(con);
2696 return rc;
2697 }
2698
2699 @@ -353,6 +487,10 @@ static int con_close_socket(struct ceph_connection *con)
2700 static void ceph_msg_remove(struct ceph_msg *msg)
2701 {
2702 list_del_init(&msg->list_head);
2703 + BUG_ON(msg->con == NULL);
2704 + msg->con->ops->put(msg->con);
2705 + msg->con = NULL;
2706 +
2707 ceph_msg_put(msg);
2708 }
2709 static void ceph_msg_remove_list(struct list_head *head)
2710 @@ -372,8 +510,11 @@ static void reset_connection(struct ceph_connection *con)
2711 ceph_msg_remove_list(&con->out_sent);
2712
2713 if (con->in_msg) {
2714 + BUG_ON(con->in_msg->con != con);
2715 + con->in_msg->con = NULL;
2716 ceph_msg_put(con->in_msg);
2717 con->in_msg = NULL;
2718 + con->ops->put(con);
2719 }
2720
2721 con->connect_seq = 0;
2722 @@ -391,32 +532,44 @@ static void reset_connection(struct ceph_connection *con)
2723 */
2724 void ceph_con_close(struct ceph_connection *con)
2725 {
2726 + mutex_lock(&con->mutex);
2727 dout("con_close %p peer %s\n", con,
2728 ceph_pr_addr(&con->peer_addr.in_addr));
2729 - set_bit(CLOSED, &con->state); /* in case there's queued work */
2730 - clear_bit(STANDBY, &con->state); /* avoid connect_seq bump */
2731 - clear_bit(LOSSYTX, &con->state); /* so we retry next connect */
2732 - clear_bit(KEEPALIVE_PENDING, &con->state);
2733 - clear_bit(WRITE_PENDING, &con->state);
2734 - mutex_lock(&con->mutex);
2735 + con->state = CON_STATE_CLOSED;
2736 +
2737 + clear_bit(CON_FLAG_LOSSYTX, &con->flags); /* so we retry next connect */
2738 + clear_bit(CON_FLAG_KEEPALIVE_PENDING, &con->flags);
2739 + clear_bit(CON_FLAG_WRITE_PENDING, &con->flags);
2740 + clear_bit(CON_FLAG_KEEPALIVE_PENDING, &con->flags);
2741 + clear_bit(CON_FLAG_BACKOFF, &con->flags);
2742 +
2743 reset_connection(con);
2744 con->peer_global_seq = 0;
2745 cancel_delayed_work(&con->work);
2746 + con_close_socket(con);
2747 mutex_unlock(&con->mutex);
2748 - queue_con(con);
2749 }
2750 EXPORT_SYMBOL(ceph_con_close);
2751
2752 /*
2753 * Reopen a closed connection, with a new peer address.
2754 */
2755 -void ceph_con_open(struct ceph_connection *con, struct ceph_entity_addr *addr)
2756 +void ceph_con_open(struct ceph_connection *con,
2757 + __u8 entity_type, __u64 entity_num,
2758 + struct ceph_entity_addr *addr)
2759 {
2760 + mutex_lock(&con->mutex);
2761 dout("con_open %p %s\n", con, ceph_pr_addr(&addr->in_addr));
2762 - set_bit(OPENING, &con->state);
2763 - clear_bit(CLOSED, &con->state);
2764 +
2765 + BUG_ON(con->state != CON_STATE_CLOSED);
2766 + con->state = CON_STATE_PREOPEN;
2767 +
2768 + con->peer_name.type = (__u8) entity_type;
2769 + con->peer_name.num = cpu_to_le64(entity_num);
2770 +
2771 memcpy(&con->peer_addr, addr, sizeof(*addr));
2772 con->delay = 0; /* reset backoff memory */
2773 + mutex_unlock(&con->mutex);
2774 queue_con(con);
2775 }
2776 EXPORT_SYMBOL(ceph_con_open);
2777 @@ -430,42 +583,26 @@ bool ceph_con_opened(struct ceph_connection *con)
2778 }
2779
2780 /*
2781 - * generic get/put
2782 - */
2783 -struct ceph_connection *ceph_con_get(struct ceph_connection *con)
2784 -{
2785 - int nref = __atomic_add_unless(&con->nref, 1, 0);
2786 -
2787 - dout("con_get %p nref = %d -> %d\n", con, nref, nref + 1);
2788 -
2789 - return nref ? con : NULL;
2790 -}
2791 -
2792 -void ceph_con_put(struct ceph_connection *con)
2793 -{
2794 - int nref = atomic_dec_return(&con->nref);
2795 -
2796 - BUG_ON(nref < 0);
2797 - if (nref == 0) {
2798 - BUG_ON(con->sock);
2799 - kfree(con);
2800 - }
2801 - dout("con_put %p nref = %d -> %d\n", con, nref + 1, nref);
2802 -}
2803 -
2804 -/*
2805 * initialize a new connection.
2806 */
2807 -void ceph_con_init(struct ceph_messenger *msgr, struct ceph_connection *con)
2808 +void ceph_con_init(struct ceph_connection *con, void *private,
2809 + const struct ceph_connection_operations *ops,
2810 + struct ceph_messenger *msgr)
2811 {
2812 dout("con_init %p\n", con);
2813 memset(con, 0, sizeof(*con));
2814 - atomic_set(&con->nref, 1);
2815 + con->private = private;
2816 + con->ops = ops;
2817 con->msgr = msgr;
2818 +
2819 + con_sock_state_init(con);
2820 +
2821 mutex_init(&con->mutex);
2822 INIT_LIST_HEAD(&con->out_queue);
2823 INIT_LIST_HEAD(&con->out_sent);
2824 INIT_DELAYED_WORK(&con->work, con_work);
2825 +
2826 + con->state = CON_STATE_CLOSED;
2827 }
2828 EXPORT_SYMBOL(ceph_con_init);
2829
2830 @@ -486,14 +623,14 @@ static u32 get_global_seq(struct ceph_messenger *msgr, u32 gt)
2831 return ret;
2832 }
2833
2834 -static void ceph_con_out_kvec_reset(struct ceph_connection *con)
2835 +static void con_out_kvec_reset(struct ceph_connection *con)
2836 {
2837 con->out_kvec_left = 0;
2838 con->out_kvec_bytes = 0;
2839 con->out_kvec_cur = &con->out_kvec[0];
2840 }
2841
2842 -static void ceph_con_out_kvec_add(struct ceph_connection *con,
2843 +static void con_out_kvec_add(struct ceph_connection *con,
2844 size_t size, void *data)
2845 {
2846 int index;
2847 @@ -507,6 +644,53 @@ static void ceph_con_out_kvec_add(struct ceph_connection *con,
2848 con->out_kvec_bytes += size;
2849 }
2850
2851 +#ifdef CONFIG_BLOCK
2852 +static void init_bio_iter(struct bio *bio, struct bio **iter, int *seg)
2853 +{
2854 + if (!bio) {
2855 + *iter = NULL;
2856 + *seg = 0;
2857 + return;
2858 + }
2859 + *iter = bio;
2860 + *seg = bio->bi_idx;
2861 +}
2862 +
2863 +static void iter_bio_next(struct bio **bio_iter, int *seg)
2864 +{
2865 + if (*bio_iter == NULL)
2866 + return;
2867 +
2868 + BUG_ON(*seg >= (*bio_iter)->bi_vcnt);
2869 +
2870 + (*seg)++;
2871 + if (*seg == (*bio_iter)->bi_vcnt)
2872 + init_bio_iter((*bio_iter)->bi_next, bio_iter, seg);
2873 +}
2874 +#endif
2875 +
2876 +static void prepare_write_message_data(struct ceph_connection *con)
2877 +{
2878 + struct ceph_msg *msg = con->out_msg;
2879 +
2880 + BUG_ON(!msg);
2881 + BUG_ON(!msg->hdr.data_len);
2882 +
2883 + /* initialize page iterator */
2884 + con->out_msg_pos.page = 0;
2885 + if (msg->pages)
2886 + con->out_msg_pos.page_pos = msg->page_alignment;
2887 + else
2888 + con->out_msg_pos.page_pos = 0;
2889 +#ifdef CONFIG_BLOCK
2890 + if (msg->bio)
2891 + init_bio_iter(msg->bio, &msg->bio_iter, &msg->bio_seg);
2892 +#endif
2893 + con->out_msg_pos.data_pos = 0;
2894 + con->out_msg_pos.did_page_crc = false;
2895 + con->out_more = 1; /* data + footer will follow */
2896 +}
2897 +
2898 /*
2899 * Prepare footer for currently outgoing message, and finish things
2900 * off. Assumes out_kvec* are already valid.. we just add on to the end.
2901 @@ -516,6 +700,8 @@ static void prepare_write_message_footer(struct ceph_connection *con)
2902 struct ceph_msg *m = con->out_msg;
2903 int v = con->out_kvec_left;
2904
2905 + m->footer.flags |= CEPH_MSG_FOOTER_COMPLETE;
2906 +
2907 dout("prepare_write_message_footer %p\n", con);
2908 con->out_kvec_is_msg = true;
2909 con->out_kvec[v].iov_base = &m->footer;
2910 @@ -534,7 +720,7 @@ static void prepare_write_message(struct ceph_connection *con)
2911 struct ceph_msg *m;
2912 u32 crc;
2913
2914 - ceph_con_out_kvec_reset(con);
2915 + con_out_kvec_reset(con);
2916 con->out_kvec_is_msg = true;
2917 con->out_msg_done = false;
2918
2919 @@ -542,14 +728,16 @@ static void prepare_write_message(struct ceph_connection *con)
2920 * TCP packet that's a good thing. */
2921 if (con->in_seq > con->in_seq_acked) {
2922 con->in_seq_acked = con->in_seq;
2923 - ceph_con_out_kvec_add(con, sizeof (tag_ack), &tag_ack);
2924 + con_out_kvec_add(con, sizeof (tag_ack), &tag_ack);
2925 con->out_temp_ack = cpu_to_le64(con->in_seq_acked);
2926 - ceph_con_out_kvec_add(con, sizeof (con->out_temp_ack),
2927 + con_out_kvec_add(con, sizeof (con->out_temp_ack),
2928 &con->out_temp_ack);
2929 }
2930
2931 + BUG_ON(list_empty(&con->out_queue));
2932 m = list_first_entry(&con->out_queue, struct ceph_msg, list_head);
2933 con->out_msg = m;
2934 + BUG_ON(m->con != con);
2935
2936 /* put message on sent list */
2937 ceph_msg_get(m);
2938 @@ -572,18 +760,18 @@ static void prepare_write_message(struct ceph_connection *con)
2939 BUG_ON(le32_to_cpu(m->hdr.front_len) != m->front.iov_len);
2940
2941 /* tag + hdr + front + middle */
2942 - ceph_con_out_kvec_add(con, sizeof (tag_msg), &tag_msg);
2943 - ceph_con_out_kvec_add(con, sizeof (m->hdr), &m->hdr);
2944 - ceph_con_out_kvec_add(con, m->front.iov_len, m->front.iov_base);
2945 + con_out_kvec_add(con, sizeof (tag_msg), &tag_msg);
2946 + con_out_kvec_add(con, sizeof (m->hdr), &m->hdr);
2947 + con_out_kvec_add(con, m->front.iov_len, m->front.iov_base);
2948
2949 if (m->middle)
2950 - ceph_con_out_kvec_add(con, m->middle->vec.iov_len,
2951 + con_out_kvec_add(con, m->middle->vec.iov_len,
2952 m->middle->vec.iov_base);
2953
2954 /* fill in crc (except data pages), footer */
2955 crc = crc32c(0, &m->hdr, offsetof(struct ceph_msg_header, crc));
2956 con->out_msg->hdr.crc = cpu_to_le32(crc);
2957 - con->out_msg->footer.flags = CEPH_MSG_FOOTER_COMPLETE;
2958 + con->out_msg->footer.flags = 0;
2959
2960 crc = crc32c(0, m->front.iov_base, m->front.iov_len);
2961 con->out_msg->footer.front_crc = cpu_to_le32(crc);
2962 @@ -593,28 +781,19 @@ static void prepare_write_message(struct ceph_connection *con)
2963 con->out_msg->footer.middle_crc = cpu_to_le32(crc);
2964 } else
2965 con->out_msg->footer.middle_crc = 0;
2966 - con->out_msg->footer.data_crc = 0;
2967 - dout("prepare_write_message front_crc %u data_crc %u\n",
2968 + dout("%s front_crc %u middle_crc %u\n", __func__,
2969 le32_to_cpu(con->out_msg->footer.front_crc),
2970 le32_to_cpu(con->out_msg->footer.middle_crc));
2971
2972 /* is there a data payload? */
2973 - if (le32_to_cpu(m->hdr.data_len) > 0) {
2974 - /* initialize page iterator */
2975 - con->out_msg_pos.page = 0;
2976 - if (m->pages)
2977 - con->out_msg_pos.page_pos = m->page_alignment;
2978 - else
2979 - con->out_msg_pos.page_pos = 0;
2980 - con->out_msg_pos.data_pos = 0;
2981 - con->out_msg_pos.did_page_crc = false;
2982 - con->out_more = 1; /* data + footer will follow */
2983 - } else {
2984 + con->out_msg->footer.data_crc = 0;
2985 + if (m->hdr.data_len)
2986 + prepare_write_message_data(con);
2987 + else
2988 /* no, queue up footer too and be done */
2989 prepare_write_message_footer(con);
2990 - }
2991
2992 - set_bit(WRITE_PENDING, &con->state);
2993 + set_bit(CON_FLAG_WRITE_PENDING, &con->flags);
2994 }
2995
2996 /*
2997 @@ -626,16 +805,16 @@ static void prepare_write_ack(struct ceph_connection *con)
2998 con->in_seq_acked, con->in_seq);
2999 con->in_seq_acked = con->in_seq;
3000
3001 - ceph_con_out_kvec_reset(con);
3002 + con_out_kvec_reset(con);
3003
3004 - ceph_con_out_kvec_add(con, sizeof (tag_ack), &tag_ack);
3005 + con_out_kvec_add(con, sizeof (tag_ack), &tag_ack);
3006
3007 con->out_temp_ack = cpu_to_le64(con->in_seq_acked);
3008 - ceph_con_out_kvec_add(con, sizeof (con->out_temp_ack),
3009 + con_out_kvec_add(con, sizeof (con->out_temp_ack),
3010 &con->out_temp_ack);
3011
3012 con->out_more = 1; /* more will follow.. eventually.. */
3013 - set_bit(WRITE_PENDING, &con->state);
3014 + set_bit(CON_FLAG_WRITE_PENDING, &con->flags);
3015 }
3016
3017 /*
3018 @@ -644,63 +823,60 @@ static void prepare_write_ack(struct ceph_connection *con)
3019 static void prepare_write_keepalive(struct ceph_connection *con)
3020 {
3021 dout("prepare_write_keepalive %p\n", con);
3022 - ceph_con_out_kvec_reset(con);
3023 - ceph_con_out_kvec_add(con, sizeof (tag_keepalive), &tag_keepalive);
3024 - set_bit(WRITE_PENDING, &con->state);
3025 + con_out_kvec_reset(con);
3026 + con_out_kvec_add(con, sizeof (tag_keepalive), &tag_keepalive);
3027 + set_bit(CON_FLAG_WRITE_PENDING, &con->flags);
3028 }
3029
3030 /*
3031 * Connection negotiation.
3032 */
3033
3034 -static int prepare_connect_authorizer(struct ceph_connection *con)
3035 +static struct ceph_auth_handshake *get_connect_authorizer(struct ceph_connection *con,
3036 + int *auth_proto)
3037 {
3038 - void *auth_buf;
3039 - int auth_len = 0;
3040 - int auth_protocol = 0;
3041 + struct ceph_auth_handshake *auth;
3042 +
3043 + if (!con->ops->get_authorizer) {
3044 + con->out_connect.authorizer_protocol = CEPH_AUTH_UNKNOWN;
3045 + con->out_connect.authorizer_len = 0;
3046 + return NULL;
3047 + }
3048
3049 + /* Can't hold the mutex while getting authorizer */
3050 mutex_unlock(&con->mutex);
3051 - if (con->ops->get_authorizer)
3052 - con->ops->get_authorizer(con, &auth_buf, &auth_len,
3053 - &auth_protocol, &con->auth_reply_buf,
3054 - &con->auth_reply_buf_len,
3055 - con->auth_retry);
3056 + auth = con->ops->get_authorizer(con, auth_proto, con->auth_retry);
3057 mutex_lock(&con->mutex);
3058
3059 - if (test_bit(CLOSED, &con->state) ||
3060 - test_bit(OPENING, &con->state))
3061 - return -EAGAIN;
3062 + if (IS_ERR(auth))
3063 + return auth;
3064 + if (con->state != CON_STATE_NEGOTIATING)
3065 + return ERR_PTR(-EAGAIN);
3066
3067 - con->out_connect.authorizer_protocol = cpu_to_le32(auth_protocol);
3068 - con->out_connect.authorizer_len = cpu_to_le32(auth_len);
3069 -
3070 - if (auth_len)
3071 - ceph_con_out_kvec_add(con, auth_len, auth_buf);
3072 -
3073 - return 0;
3074 + con->auth_reply_buf = auth->authorizer_reply_buf;
3075 + con->auth_reply_buf_len = auth->authorizer_reply_buf_len;
3076 + return auth;
3077 }
3078
3079 /*
3080 * We connected to a peer and are saying hello.
3081 */
3082 -static void prepare_write_banner(struct ceph_messenger *msgr,
3083 - struct ceph_connection *con)
3084 +static void prepare_write_banner(struct ceph_connection *con)
3085 {
3086 - ceph_con_out_kvec_reset(con);
3087 - ceph_con_out_kvec_add(con, strlen(CEPH_BANNER), CEPH_BANNER);
3088 - ceph_con_out_kvec_add(con, sizeof (msgr->my_enc_addr),
3089 - &msgr->my_enc_addr);
3090 + con_out_kvec_add(con, strlen(CEPH_BANNER), CEPH_BANNER);
3091 + con_out_kvec_add(con, sizeof (con->msgr->my_enc_addr),
3092 + &con->msgr->my_enc_addr);
3093
3094 con->out_more = 0;
3095 - set_bit(WRITE_PENDING, &con->state);
3096 + set_bit(CON_FLAG_WRITE_PENDING, &con->flags);
3097 }
3098
3099 -static int prepare_write_connect(struct ceph_messenger *msgr,
3100 - struct ceph_connection *con,
3101 - int include_banner)
3102 +static int prepare_write_connect(struct ceph_connection *con)
3103 {
3104 unsigned global_seq = get_global_seq(con->msgr, 0);
3105 int proto;
3106 + int auth_proto;
3107 + struct ceph_auth_handshake *auth;
3108
3109 switch (con->peer_name.type) {
3110 case CEPH_ENTITY_TYPE_MON:
3111 @@ -719,23 +895,32 @@ static int prepare_write_connect(struct ceph_messenger *msgr,
3112 dout("prepare_write_connect %p cseq=%d gseq=%d proto=%d\n", con,
3113 con->connect_seq, global_seq, proto);
3114
3115 - con->out_connect.features = cpu_to_le64(msgr->supported_features);
3116 + con->out_connect.features = cpu_to_le64(con->msgr->supported_features);
3117 con->out_connect.host_type = cpu_to_le32(CEPH_ENTITY_TYPE_CLIENT);
3118 con->out_connect.connect_seq = cpu_to_le32(con->connect_seq);
3119 con->out_connect.global_seq = cpu_to_le32(global_seq);
3120 con->out_connect.protocol_version = cpu_to_le32(proto);
3121 con->out_connect.flags = 0;
3122
3123 - if (include_banner)
3124 - prepare_write_banner(msgr, con);
3125 - else
3126 - ceph_con_out_kvec_reset(con);
3127 - ceph_con_out_kvec_add(con, sizeof (con->out_connect), &con->out_connect);
3128 + auth_proto = CEPH_AUTH_UNKNOWN;
3129 + auth = get_connect_authorizer(con, &auth_proto);
3130 + if (IS_ERR(auth))
3131 + return PTR_ERR(auth);
3132 +
3133 + con->out_connect.authorizer_protocol = cpu_to_le32(auth_proto);
3134 + con->out_connect.authorizer_len = auth ?
3135 + cpu_to_le32(auth->authorizer_buf_len) : 0;
3136 +
3137 + con_out_kvec_add(con, sizeof (con->out_connect),
3138 + &con->out_connect);
3139 + if (auth && auth->authorizer_buf_len)
3140 + con_out_kvec_add(con, auth->authorizer_buf_len,
3141 + auth->authorizer_buf);
3142
3143 con->out_more = 0;
3144 - set_bit(WRITE_PENDING, &con->state);
3145 + set_bit(CON_FLAG_WRITE_PENDING, &con->flags);
3146
3147 - return prepare_connect_authorizer(con);
3148 + return 0;
3149 }
3150
3151 /*
3152 @@ -781,30 +966,34 @@ out:
3153 return ret; /* done! */
3154 }
3155
3156 -#ifdef CONFIG_BLOCK
3157 -static void init_bio_iter(struct bio *bio, struct bio **iter, int *seg)
3158 +static void out_msg_pos_next(struct ceph_connection *con, struct page *page,
3159 + size_t len, size_t sent, bool in_trail)
3160 {
3161 - if (!bio) {
3162 - *iter = NULL;
3163 - *seg = 0;
3164 - return;
3165 - }
3166 - *iter = bio;
3167 - *seg = bio->bi_idx;
3168 -}
3169 + struct ceph_msg *msg = con->out_msg;
3170
3171 -static void iter_bio_next(struct bio **bio_iter, int *seg)
3172 -{
3173 - if (*bio_iter == NULL)
3174 - return;
3175 + BUG_ON(!msg);
3176 + BUG_ON(!sent);
3177
3178 - BUG_ON(*seg >= (*bio_iter)->bi_vcnt);
3179 + con->out_msg_pos.data_pos += sent;
3180 + con->out_msg_pos.page_pos += sent;
3181 + if (sent < len)
3182 + return;
3183
3184 - (*seg)++;
3185 - if (*seg == (*bio_iter)->bi_vcnt)
3186 - init_bio_iter((*bio_iter)->bi_next, bio_iter, seg);
3187 -}
3188 + BUG_ON(sent != len);
3189 + con->out_msg_pos.page_pos = 0;
3190 + con->out_msg_pos.page++;
3191 + con->out_msg_pos.did_page_crc = false;
3192 + if (in_trail)
3193 + list_move_tail(&page->lru,
3194 + &msg->trail->head);
3195 + else if (msg->pagelist)
3196 + list_move_tail(&page->lru,
3197 + &msg->pagelist->head);
3198 +#ifdef CONFIG_BLOCK
3199 + else if (msg->bio)
3200 + iter_bio_next(&msg->bio_iter, &msg->bio_seg);
3201 #endif
3202 +}
3203
3204 /*
3205 * Write as much message data payload as we can. If we finish, queue
3206 @@ -821,41 +1010,36 @@ static int write_partial_msg_pages(struct ceph_connection *con)
3207 bool do_datacrc = !con->msgr->nocrc;
3208 int ret;
3209 int total_max_write;
3210 - int in_trail = 0;
3211 - size_t trail_len = (msg->trail ? msg->trail->length : 0);
3212 + bool in_trail = false;
3213 + const size_t trail_len = (msg->trail ? msg->trail->length : 0);
3214 + const size_t trail_off = data_len - trail_len;
3215
3216 dout("write_partial_msg_pages %p msg %p page %d/%d offset %d\n",
3217 - con, con->out_msg, con->out_msg_pos.page, con->out_msg->nr_pages,
3218 + con, msg, con->out_msg_pos.page, msg->nr_pages,
3219 con->out_msg_pos.page_pos);
3220
3221 -#ifdef CONFIG_BLOCK
3222 - if (msg->bio && !msg->bio_iter)
3223 - init_bio_iter(msg->bio, &msg->bio_iter, &msg->bio_seg);
3224 -#endif
3225 -
3226 + /*
3227 + * Iterate through each page that contains data to be
3228 + * written, and send as much as possible for each.
3229 + *
3230 + * If we are calculating the data crc (the default), we will
3231 + * need to map the page. If we have no pages, they have
3232 + * been revoked, so use the zero page.
3233 + */
3234 while (data_len > con->out_msg_pos.data_pos) {
3235 struct page *page = NULL;
3236 int max_write = PAGE_SIZE;
3237 int bio_offset = 0;
3238
3239 - total_max_write = data_len - trail_len -
3240 - con->out_msg_pos.data_pos;
3241 -
3242 - /*
3243 - * if we are calculating the data crc (the default), we need
3244 - * to map the page. if our pages[] has been revoked, use the
3245 - * zero page.
3246 - */
3247 -
3248 - /* have we reached the trail part of the data? */
3249 - if (con->out_msg_pos.data_pos >= data_len - trail_len) {
3250 - in_trail = 1;
3251 + in_trail = in_trail || con->out_msg_pos.data_pos >= trail_off;
3252 + if (!in_trail)
3253 + total_max_write = trail_off - con->out_msg_pos.data_pos;
3254
3255 + if (in_trail) {
3256 total_max_write = data_len - con->out_msg_pos.data_pos;
3257
3258 page = list_first_entry(&msg->trail->head,
3259 struct page, lru);
3260 - max_write = PAGE_SIZE;
3261 } else if (msg->pages) {
3262 page = msg->pages[con->out_msg_pos.page];
3263 } else if (msg->pagelist) {
3264 @@ -878,52 +1062,32 @@ static int write_partial_msg_pages(struct ceph_connection *con)
3265
3266 if (do_datacrc && !con->out_msg_pos.did_page_crc) {
3267 void *base;
3268 - u32 crc;
3269 - u32 tmpcrc = le32_to_cpu(con->out_msg->footer.data_crc);
3270 + u32 crc = le32_to_cpu(msg->footer.data_crc);
3271 char *kaddr;
3272
3273 kaddr = kmap(page);
3274 BUG_ON(kaddr == NULL);
3275 base = kaddr + con->out_msg_pos.page_pos + bio_offset;
3276 - crc = crc32c(tmpcrc, base, len);
3277 - con->out_msg->footer.data_crc = cpu_to_le32(crc);
3278 + crc = crc32c(crc, base, len);
3279 + kunmap(page);
3280 + msg->footer.data_crc = cpu_to_le32(crc);
3281 con->out_msg_pos.did_page_crc = true;
3282 }
3283 ret = ceph_tcp_sendpage(con->sock, page,
3284 con->out_msg_pos.page_pos + bio_offset,
3285 len, 1);
3286 -
3287 - if (do_datacrc)
3288 - kunmap(page);
3289 -
3290 if (ret <= 0)
3291 goto out;
3292
3293 - con->out_msg_pos.data_pos += ret;
3294 - con->out_msg_pos.page_pos += ret;
3295 - if (ret == len) {
3296 - con->out_msg_pos.page_pos = 0;
3297 - con->out_msg_pos.page++;
3298 - con->out_msg_pos.did_page_crc = false;
3299 - if (in_trail)
3300 - list_move_tail(&page->lru,
3301 - &msg->trail->head);
3302 - else if (msg->pagelist)
3303 - list_move_tail(&page->lru,
3304 - &msg->pagelist->head);
3305 -#ifdef CONFIG_BLOCK
3306 - else if (msg->bio)
3307 - iter_bio_next(&msg->bio_iter, &msg->bio_seg);
3308 -#endif
3309 - }
3310 + out_msg_pos_next(con, page, len, (size_t) ret, in_trail);
3311 }
3312
3313 dout("write_partial_msg_pages %p msg %p done\n", con, msg);
3314
3315 /* prepare and queue up footer, too */
3316 if (!do_datacrc)
3317 - con->out_msg->footer.flags |= CEPH_MSG_FOOTER_NOCRC;
3318 - ceph_con_out_kvec_reset(con);
3319 + msg->footer.flags |= CEPH_MSG_FOOTER_NOCRC;
3320 + con_out_kvec_reset(con);
3321 prepare_write_message_footer(con);
3322 ret = 1;
3323 out:
3324 @@ -992,11 +1156,10 @@ static int prepare_read_message(struct ceph_connection *con)
3325
3326
3327 static int read_partial(struct ceph_connection *con,
3328 - int *to, int size, void *object)
3329 + int end, int size, void *object)
3330 {
3331 - *to += size;
3332 - while (con->in_base_pos < *to) {
3333 - int left = *to - con->in_base_pos;
3334 + while (con->in_base_pos < end) {
3335 + int left = end - con->in_base_pos;
3336 int have = size - left;
3337 int ret = ceph_tcp_recvmsg(con->sock, object + have, left);
3338 if (ret <= 0)
3339 @@ -1012,37 +1175,52 @@ static int read_partial(struct ceph_connection *con,
3340 */
3341 static int read_partial_banner(struct ceph_connection *con)
3342 {
3343 - int ret, to = 0;
3344 + int size;
3345 + int end;
3346 + int ret;
3347
3348 dout("read_partial_banner %p at %d\n", con, con->in_base_pos);
3349
3350 /* peer's banner */
3351 - ret = read_partial(con, &to, strlen(CEPH_BANNER), con->in_banner);
3352 + size = strlen(CEPH_BANNER);
3353 + end = size;
3354 + ret = read_partial(con, end, size, con->in_banner);
3355 if (ret <= 0)
3356 goto out;
3357 - ret = read_partial(con, &to, sizeof(con->actual_peer_addr),
3358 - &con->actual_peer_addr);
3359 +
3360 + size = sizeof (con->actual_peer_addr);
3361 + end += size;
3362 + ret = read_partial(con, end, size, &con->actual_peer_addr);
3363 if (ret <= 0)
3364 goto out;
3365 - ret = read_partial(con, &to, sizeof(con->peer_addr_for_me),
3366 - &con->peer_addr_for_me);
3367 +
3368 + size = sizeof (con->peer_addr_for_me);
3369 + end += size;
3370 + ret = read_partial(con, end, size, &con->peer_addr_for_me);
3371 if (ret <= 0)
3372 goto out;
3373 +
3374 out:
3375 return ret;
3376 }
3377
3378 static int read_partial_connect(struct ceph_connection *con)
3379 {
3380 - int ret, to = 0;
3381 + int size;
3382 + int end;
3383 + int ret;
3384
3385 dout("read_partial_connect %p at %d\n", con, con->in_base_pos);
3386
3387 - ret = read_partial(con, &to, sizeof(con->in_reply), &con->in_reply);
3388 + size = sizeof (con->in_reply);
3389 + end = size;
3390 + ret = read_partial(con, end, size, &con->in_reply);
3391 if (ret <= 0)
3392 goto out;
3393 - ret = read_partial(con, &to, le32_to_cpu(con->in_reply.authorizer_len),
3394 - con->auth_reply_buf);
3395 +
3396 + size = le32_to_cpu(con->in_reply.authorizer_len);
3397 + end += size;
3398 + ret = read_partial(con, end, size, con->auth_reply_buf);
3399 if (ret <= 0)
3400 goto out;
3401
3402 @@ -1321,20 +1499,14 @@ static int process_banner(struct ceph_connection *con)
3403 ceph_pr_addr(&con->msgr->inst.addr.in_addr));
3404 }
3405
3406 - set_bit(NEGOTIATING, &con->state);
3407 - prepare_read_connect(con);
3408 return 0;
3409 }
3410
3411 static void fail_protocol(struct ceph_connection *con)
3412 {
3413 reset_connection(con);
3414 - set_bit(CLOSED, &con->state); /* in case there's queued work */
3415 -
3416 - mutex_unlock(&con->mutex);
3417 - if (con->ops->bad_proto)
3418 - con->ops->bad_proto(con);
3419 - mutex_lock(&con->mutex);
3420 + BUG_ON(con->state != CON_STATE_NEGOTIATING);
3421 + con->state = CON_STATE_CLOSED;
3422 }
3423
3424 static int process_connect(struct ceph_connection *con)
3425 @@ -1377,7 +1549,8 @@ static int process_connect(struct ceph_connection *con)
3426 return -1;
3427 }
3428 con->auth_retry = 1;
3429 - ret = prepare_write_connect(con->msgr, con, 0);
3430 + con_out_kvec_reset(con);
3431 + ret = prepare_write_connect(con);
3432 if (ret < 0)
3433 return ret;
3434 prepare_read_connect(con);
3435 @@ -1392,12 +1565,15 @@ static int process_connect(struct ceph_connection *con)
3436 * dropped messages.
3437 */
3438 dout("process_connect got RESET peer seq %u\n",
3439 - le32_to_cpu(con->in_connect.connect_seq));
3440 + le32_to_cpu(con->in_reply.connect_seq));
3441 pr_err("%s%lld %s connection reset\n",
3442 ENTITY_NAME(con->peer_name),
3443 ceph_pr_addr(&con->peer_addr.in_addr));
3444 reset_connection(con);
3445 - prepare_write_connect(con->msgr, con, 0);
3446 + con_out_kvec_reset(con);
3447 + ret = prepare_write_connect(con);
3448 + if (ret < 0)
3449 + return ret;
3450 prepare_read_connect(con);
3451
3452 /* Tell ceph about it. */
3453 @@ -1406,8 +1582,7 @@ static int process_connect(struct ceph_connection *con)
3454 if (con->ops->peer_reset)
3455 con->ops->peer_reset(con);
3456 mutex_lock(&con->mutex);
3457 - if (test_bit(CLOSED, &con->state) ||
3458 - test_bit(OPENING, &con->state))
3459 + if (con->state != CON_STATE_NEGOTIATING)
3460 return -EAGAIN;
3461 break;
3462
3463 @@ -1416,11 +1591,14 @@ static int process_connect(struct ceph_connection *con)
3464 * If we sent a smaller connect_seq than the peer has, try
3465 * again with a larger value.
3466 */
3467 - dout("process_connect got RETRY my seq = %u, peer_seq = %u\n",
3468 + dout("process_connect got RETRY_SESSION my seq %u, peer %u\n",
3469 le32_to_cpu(con->out_connect.connect_seq),
3470 - le32_to_cpu(con->in_connect.connect_seq));
3471 - con->connect_seq = le32_to_cpu(con->in_connect.connect_seq);
3472 - prepare_write_connect(con->msgr, con, 0);
3473 + le32_to_cpu(con->in_reply.connect_seq));
3474 + con->connect_seq = le32_to_cpu(con->in_reply.connect_seq);
3475 + con_out_kvec_reset(con);
3476 + ret = prepare_write_connect(con);
3477 + if (ret < 0)
3478 + return ret;
3479 prepare_read_connect(con);
3480 break;
3481
3482 @@ -1431,10 +1609,13 @@ static int process_connect(struct ceph_connection *con)
3483 */
3484 dout("process_connect got RETRY_GLOBAL my %u peer_gseq %u\n",
3485 con->peer_global_seq,
3486 - le32_to_cpu(con->in_connect.global_seq));
3487 + le32_to_cpu(con->in_reply.global_seq));
3488 get_global_seq(con->msgr,
3489 - le32_to_cpu(con->in_connect.global_seq));
3490 - prepare_write_connect(con->msgr, con, 0);
3491 + le32_to_cpu(con->in_reply.global_seq));
3492 + con_out_kvec_reset(con);
3493 + ret = prepare_write_connect(con);
3494 + if (ret < 0)
3495 + return ret;
3496 prepare_read_connect(con);
3497 break;
3498
3499 @@ -1449,7 +1630,10 @@ static int process_connect(struct ceph_connection *con)
3500 fail_protocol(con);
3501 return -1;
3502 }
3503 - clear_bit(CONNECTING, &con->state);
3504 +
3505 + BUG_ON(con->state != CON_STATE_NEGOTIATING);
3506 + con->state = CON_STATE_OPEN;
3507 +
3508 con->peer_global_seq = le32_to_cpu(con->in_reply.global_seq);
3509 con->connect_seq++;
3510 con->peer_features = server_feat;
3511 @@ -1461,7 +1645,9 @@ static int process_connect(struct ceph_connection *con)
3512 le32_to_cpu(con->in_reply.connect_seq));
3513
3514 if (con->in_reply.flags & CEPH_MSG_CONNECT_LOSSY)
3515 - set_bit(LOSSYTX, &con->state);
3516 + set_bit(CON_FLAG_LOSSYTX, &con->flags);
3517 +
3518 + con->delay = 0; /* reset backoff memory */
3519
3520 prepare_read_tag(con);
3521 break;
3522 @@ -1491,10 +1677,10 @@ static int process_connect(struct ceph_connection *con)
3523 */
3524 static int read_partial_ack(struct ceph_connection *con)
3525 {
3526 - int to = 0;
3527 + int size = sizeof (con->in_temp_ack);
3528 + int end = size;
3529
3530 - return read_partial(con, &to, sizeof(con->in_temp_ack),
3531 - &con->in_temp_ack);
3532 + return read_partial(con, end, size, &con->in_temp_ack);
3533 }
3534
3535
3536 @@ -1547,10 +1733,7 @@ static int read_partial_message_section(struct ceph_connection *con,
3537 return 1;
3538 }
3539
3540 -static struct ceph_msg *ceph_alloc_msg(struct ceph_connection *con,
3541 - struct ceph_msg_header *hdr,
3542 - int *skip);
3543 -
3544 +static int ceph_con_in_msg_alloc(struct ceph_connection *con, int *skip);
3545
3546 static int read_partial_message_pages(struct ceph_connection *con,
3547 struct page **pages,
3548 @@ -1593,9 +1776,6 @@ static int read_partial_message_bio(struct ceph_connection *con,
3549 void *p;
3550 int ret, left;
3551
3552 - if (IS_ERR(bv))
3553 - return PTR_ERR(bv);
3554 -
3555 left = min((int)(data_len - con->in_msg_pos.data_pos),
3556 (int)(bv->bv_len - con->in_msg_pos.page_pos));
3557
3558 @@ -1627,26 +1807,22 @@ static int read_partial_message_bio(struct ceph_connection *con,
3559 static int read_partial_message(struct ceph_connection *con)
3560 {
3561 struct ceph_msg *m = con->in_msg;
3562 + int size;
3563 + int end;
3564 int ret;
3565 - int to, left;
3566 unsigned front_len, middle_len, data_len;
3567 bool do_datacrc = !con->msgr->nocrc;
3568 - int skip;
3569 u64 seq;
3570 u32 crc;
3571
3572 dout("read_partial_message con %p msg %p\n", con, m);
3573
3574 /* header */
3575 - while (con->in_base_pos < sizeof(con->in_hdr)) {
3576 - left = sizeof(con->in_hdr) - con->in_base_pos;
3577 - ret = ceph_tcp_recvmsg(con->sock,
3578 - (char *)&con->in_hdr + con->in_base_pos,
3579 - left);
3580 - if (ret <= 0)
3581 - return ret;
3582 - con->in_base_pos += ret;
3583 - }
3584 + size = sizeof (con->in_hdr);
3585 + end = size;
3586 + ret = read_partial(con, end, size, &con->in_hdr);
3587 + if (ret <= 0)
3588 + return ret;
3589
3590 crc = crc32c(0, &con->in_hdr, offsetof(struct ceph_msg_header, crc));
3591 if (cpu_to_le32(crc) != con->in_hdr.crc) {
3592 @@ -1686,10 +1862,13 @@ static int read_partial_message(struct ceph_connection *con)
3593
3594 /* allocate message? */
3595 if (!con->in_msg) {
3596 + int skip = 0;
3597 +
3598 dout("got hdr type %d front %d data %d\n", con->in_hdr.type,
3599 con->in_hdr.front_len, con->in_hdr.data_len);
3600 - skip = 0;
3601 - con->in_msg = ceph_alloc_msg(con, &con->in_hdr, &skip);
3602 + ret = ceph_con_in_msg_alloc(con, &skip);
3603 + if (ret < 0)
3604 + return ret;
3605 if (skip) {
3606 /* skip this message */
3607 dout("alloc_msg said skip message\n");
3608 @@ -1700,11 +1879,9 @@ static int read_partial_message(struct ceph_connection *con)
3609 con->in_seq++;
3610 return 0;
3611 }
3612 - if (!con->in_msg) {
3613 - con->error_msg =
3614 - "error allocating memory for incoming message";
3615 - return -ENOMEM;
3616 - }
3617 +
3618 + BUG_ON(!con->in_msg);
3619 + BUG_ON(con->in_msg->con != con);
3620 m = con->in_msg;
3621 m->front.iov_len = 0; /* haven't read it yet */
3622 if (m->middle)
3623 @@ -1716,6 +1893,11 @@ static int read_partial_message(struct ceph_connection *con)
3624 else
3625 con->in_msg_pos.page_pos = 0;
3626 con->in_msg_pos.data_pos = 0;
3627 +
3628 +#ifdef CONFIG_BLOCK
3629 + if (m->bio)
3630 + init_bio_iter(m->bio, &m->bio_iter, &m->bio_seg);
3631 +#endif
3632 }
3633
3634 /* front */
3635 @@ -1732,10 +1914,6 @@ static int read_partial_message(struct ceph_connection *con)
3636 if (ret <= 0)
3637 return ret;
3638 }
3639 -#ifdef CONFIG_BLOCK
3640 - if (m->bio && !m->bio_iter)
3641 - init_bio_iter(m->bio, &m->bio_iter, &m->bio_seg);
3642 -#endif
3643
3644 /* (page) data */
3645 while (con->in_msg_pos.data_pos < data_len) {
3646 @@ -1746,7 +1924,7 @@ static int read_partial_message(struct ceph_connection *con)
3647 return ret;
3648 #ifdef CONFIG_BLOCK
3649 } else if (m->bio) {
3650 -
3651 + BUG_ON(!m->bio_iter);
3652 ret = read_partial_message_bio(con,
3653 &m->bio_iter, &m->bio_seg,
3654 data_len, do_datacrc);
3655 @@ -1759,16 +1937,12 @@ static int read_partial_message(struct ceph_connection *con)
3656 }
3657
3658 /* footer */
3659 - to = sizeof(m->hdr) + sizeof(m->footer);
3660 - while (con->in_base_pos < to) {
3661 - left = to - con->in_base_pos;
3662 - ret = ceph_tcp_recvmsg(con->sock, (char *)&m->footer +
3663 - (con->in_base_pos - sizeof(m->hdr)),
3664 - left);
3665 - if (ret <= 0)
3666 - return ret;
3667 - con->in_base_pos += ret;
3668 - }
3669 + size = sizeof (m->footer);
3670 + end += size;
3671 + ret = read_partial(con, end, size, &m->footer);
3672 + if (ret <= 0)
3673 + return ret;
3674 +
3675 dout("read_partial_message got msg %p %d (%u) + %d (%u) + %d (%u)\n",
3676 m, front_len, m->footer.front_crc, middle_len,
3677 m->footer.middle_crc, data_len, m->footer.data_crc);
3678 @@ -1804,8 +1978,11 @@ static void process_message(struct ceph_connection *con)
3679 {
3680 struct ceph_msg *msg;
3681
3682 + BUG_ON(con->in_msg->con != con);
3683 + con->in_msg->con = NULL;
3684 msg = con->in_msg;
3685 con->in_msg = NULL;
3686 + con->ops->put(con);
3687
3688 /* if first message, set peer_name */
3689 if (con->peer_name.type == 0)
3690 @@ -1825,7 +2002,6 @@ static void process_message(struct ceph_connection *con)
3691 con->ops->dispatch(con, msg);
3692
3693 mutex_lock(&con->mutex);
3694 - prepare_read_tag(con);
3695 }
3696
3697
3698 @@ -1835,21 +2011,21 @@ static void process_message(struct ceph_connection *con)
3699 */
3700 static int try_write(struct ceph_connection *con)
3701 {
3702 - struct ceph_messenger *msgr = con->msgr;
3703 int ret = 1;
3704
3705 - dout("try_write start %p state %lu nref %d\n", con, con->state,
3706 - atomic_read(&con->nref));
3707 + dout("try_write start %p state %lu\n", con, con->state);
3708
3709 more:
3710 dout("try_write out_kvec_bytes %d\n", con->out_kvec_bytes);
3711
3712 /* open the socket first? */
3713 - if (con->sock == NULL) {
3714 - prepare_write_connect(msgr, con, 1);
3715 + if (con->state == CON_STATE_PREOPEN) {
3716 + BUG_ON(con->sock);
3717 + con->state = CON_STATE_CONNECTING;
3718 +
3719 + con_out_kvec_reset(con);
3720 + prepare_write_banner(con);
3721 prepare_read_banner(con);
3722 - set_bit(CONNECTING, &con->state);
3723 - clear_bit(NEGOTIATING, &con->state);
3724
3725 BUG_ON(con->in_msg);
3726 con->in_tag = CEPH_MSGR_TAG_READY;
3727 @@ -1896,7 +2072,7 @@ more_kvec:
3728 }
3729
3730 do_next:
3731 - if (!test_bit(CONNECTING, &con->state)) {
3732 + if (con->state == CON_STATE_OPEN) {
3733 /* is anything else pending? */
3734 if (!list_empty(&con->out_queue)) {
3735 prepare_write_message(con);
3736 @@ -1906,14 +2082,15 @@ do_next:
3737 prepare_write_ack(con);
3738 goto more;
3739 }
3740 - if (test_and_clear_bit(KEEPALIVE_PENDING, &con->state)) {
3741 + if (test_and_clear_bit(CON_FLAG_KEEPALIVE_PENDING,
3742 + &con->flags)) {
3743 prepare_write_keepalive(con);
3744 goto more;
3745 }
3746 }
3747
3748 /* Nothing to do! */
3749 - clear_bit(WRITE_PENDING, &con->state);
3750 + clear_bit(CON_FLAG_WRITE_PENDING, &con->flags);
3751 dout("try_write nothing else to write.\n");
3752 ret = 0;
3753 out:
3754 @@ -1930,38 +2107,46 @@ static int try_read(struct ceph_connection *con)
3755 {
3756 int ret = -1;
3757
3758 - if (!con->sock)
3759 - return 0;
3760 -
3761 - if (test_bit(STANDBY, &con->state))
3762 +more:
3763 + dout("try_read start on %p state %lu\n", con, con->state);
3764 + if (con->state != CON_STATE_CONNECTING &&
3765 + con->state != CON_STATE_NEGOTIATING &&
3766 + con->state != CON_STATE_OPEN)
3767 return 0;
3768
3769 - dout("try_read start on %p\n", con);
3770 + BUG_ON(!con->sock);
3771
3772 -more:
3773 dout("try_read tag %d in_base_pos %d\n", (int)con->in_tag,
3774 con->in_base_pos);
3775
3776 - /*
3777 - * process_connect and process_message drop and re-take
3778 - * con->mutex. make sure we handle a racing close or reopen.
3779 - */
3780 - if (test_bit(CLOSED, &con->state) ||
3781 - test_bit(OPENING, &con->state)) {
3782 - ret = -EAGAIN;
3783 + if (con->state == CON_STATE_CONNECTING) {
3784 + dout("try_read connecting\n");
3785 + ret = read_partial_banner(con);
3786 + if (ret <= 0)
3787 + goto out;
3788 + ret = process_banner(con);
3789 + if (ret < 0)
3790 + goto out;
3791 +
3792 + BUG_ON(con->state != CON_STATE_CONNECTING);
3793 + con->state = CON_STATE_NEGOTIATING;
3794 +
3795 + /*
3796 + * Received banner is good, exchange connection info.
3797 + * Do not reset out_kvec, as sending our banner raced
3798 + * with receiving peer banner after connect completed.
3799 + */
3800 + ret = prepare_write_connect(con);
3801 + if (ret < 0)
3802 + goto out;
3803 + prepare_read_connect(con);
3804 +
3805 + /* Send connection info before awaiting response */
3806 goto out;
3807 }
3808
3809 - if (test_bit(CONNECTING, &con->state)) {
3810 - if (!test_bit(NEGOTIATING, &con->state)) {
3811 - dout("try_read connecting\n");
3812 - ret = read_partial_banner(con);
3813 - if (ret <= 0)
3814 - goto out;
3815 - ret = process_banner(con);
3816 - if (ret < 0)
3817 - goto out;
3818 - }
3819 + if (con->state == CON_STATE_NEGOTIATING) {
3820 + dout("try_read negotiating\n");
3821 ret = read_partial_connect(con);
3822 if (ret <= 0)
3823 goto out;
3824 @@ -1971,6 +2156,8 @@ more:
3825 goto more;
3826 }
3827
3828 + BUG_ON(con->state != CON_STATE_OPEN);
3829 +
3830 if (con->in_base_pos < 0) {
3831 /*
3832 * skipping + discarding content.
3833 @@ -2004,7 +2191,8 @@ more:
3834 prepare_read_ack(con);
3835 break;
3836 case CEPH_MSGR_TAG_CLOSE:
3837 - set_bit(CLOSED, &con->state); /* fixme */
3838 + con_close_socket(con);
3839 + con->state = CON_STATE_CLOSED;
3840 goto out;
3841 default:
3842 goto bad_tag;
3843 @@ -2027,6 +2215,8 @@ more:
3844 if (con->in_tag == CEPH_MSGR_TAG_READY)
3845 goto more;
3846 process_message(con);
3847 + if (con->state == CON_STATE_OPEN)
3848 + prepare_read_tag(con);
3849 goto more;
3850 }
3851 if (con->in_tag == CEPH_MSGR_TAG_ACK) {
3852 @@ -2055,12 +2245,6 @@ bad_tag:
3853 */
3854 static void queue_con(struct ceph_connection *con)
3855 {
3856 - if (test_bit(DEAD, &con->state)) {
3857 - dout("queue_con %p ignoring: DEAD\n",
3858 - con);
3859 - return;
3860 - }
3861 -
3862 if (!con->ops->get(con)) {
3863 dout("queue_con %p ref count 0\n", con);
3864 return;
3865 @@ -2085,7 +2269,26 @@ static void con_work(struct work_struct *work)
3866
3867 mutex_lock(&con->mutex);
3868 restart:
3869 - if (test_and_clear_bit(BACKOFF, &con->state)) {
3870 + if (test_and_clear_bit(CON_FLAG_SOCK_CLOSED, &con->flags)) {
3871 + switch (con->state) {
3872 + case CON_STATE_CONNECTING:
3873 + con->error_msg = "connection failed";
3874 + break;
3875 + case CON_STATE_NEGOTIATING:
3876 + con->error_msg = "negotiation failed";
3877 + break;
3878 + case CON_STATE_OPEN:
3879 + con->error_msg = "socket closed";
3880 + break;
3881 + default:
3882 + dout("unrecognized con state %d\n", (int)con->state);
3883 + con->error_msg = "unrecognized con state";
3884 + BUG();
3885 + }
3886 + goto fault;
3887 + }
3888 +
3889 + if (test_and_clear_bit(CON_FLAG_BACKOFF, &con->flags)) {
3890 dout("con_work %p backing off\n", con);
3891 if (queue_delayed_work(ceph_msgr_wq, &con->work,
3892 round_jiffies_relative(con->delay))) {
3893 @@ -2093,41 +2296,42 @@ restart:
3894 mutex_unlock(&con->mutex);
3895 return;
3896 } else {
3897 - con->ops->put(con);
3898 dout("con_work %p FAILED to back off %lu\n", con,
3899 con->delay);
3900 + set_bit(CON_FLAG_BACKOFF, &con->flags);
3901 }
3902 + goto done;
3903 }
3904
3905 - if (test_bit(STANDBY, &con->state)) {
3906 + if (con->state == CON_STATE_STANDBY) {
3907 dout("con_work %p STANDBY\n", con);
3908 goto done;
3909 }
3910 - if (test_bit(CLOSED, &con->state)) { /* e.g. if we are replaced */
3911 - dout("con_work CLOSED\n");
3912 - con_close_socket(con);
3913 + if (con->state == CON_STATE_CLOSED) {
3914 + dout("con_work %p CLOSED\n", con);
3915 + BUG_ON(con->sock);
3916 goto done;
3917 }
3918 - if (test_and_clear_bit(OPENING, &con->state)) {
3919 - /* reopen w/ new peer */
3920 + if (con->state == CON_STATE_PREOPEN) {
3921 dout("con_work OPENING\n");
3922 - con_close_socket(con);
3923 + BUG_ON(con->sock);
3924 }
3925
3926 - if (test_and_clear_bit(SOCK_CLOSED, &con->state))
3927 - goto fault;
3928 -
3929 ret = try_read(con);
3930 if (ret == -EAGAIN)
3931 goto restart;
3932 - if (ret < 0)
3933 + if (ret < 0) {
3934 + con->error_msg = "socket error on read";
3935 goto fault;
3936 + }
3937
3938 ret = try_write(con);
3939 if (ret == -EAGAIN)
3940 goto restart;
3941 - if (ret < 0)
3942 + if (ret < 0) {
3943 + con->error_msg = "socket error on write";
3944 goto fault;
3945 + }
3946
3947 done:
3948 mutex_unlock(&con->mutex);
3949 @@ -2136,7 +2340,6 @@ done_unlocked:
3950 return;
3951
3952 fault:
3953 - mutex_unlock(&con->mutex);
3954 ceph_fault(con); /* error/fault path */
3955 goto done_unlocked;
3956 }
3957 @@ -2147,26 +2350,31 @@ fault:
3958 * exponential backoff
3959 */
3960 static void ceph_fault(struct ceph_connection *con)
3961 + __releases(con->mutex)
3962 {
3963 pr_err("%s%lld %s %s\n", ENTITY_NAME(con->peer_name),
3964 ceph_pr_addr(&con->peer_addr.in_addr), con->error_msg);
3965 dout("fault %p state %lu to peer %s\n",
3966 con, con->state, ceph_pr_addr(&con->peer_addr.in_addr));
3967
3968 - if (test_bit(LOSSYTX, &con->state)) {
3969 - dout("fault on LOSSYTX channel\n");
3970 - goto out;
3971 - }
3972 -
3973 - mutex_lock(&con->mutex);
3974 - if (test_bit(CLOSED, &con->state))
3975 - goto out_unlock;
3976 + BUG_ON(con->state != CON_STATE_CONNECTING &&
3977 + con->state != CON_STATE_NEGOTIATING &&
3978 + con->state != CON_STATE_OPEN);
3979
3980 con_close_socket(con);
3981
3982 + if (test_bit(CON_FLAG_LOSSYTX, &con->flags)) {
3983 + dout("fault on LOSSYTX channel, marking CLOSED\n");
3984 + con->state = CON_STATE_CLOSED;
3985 + goto out_unlock;
3986 + }
3987 +
3988 if (con->in_msg) {
3989 + BUG_ON(con->in_msg->con != con);
3990 + con->in_msg->con = NULL;
3991 ceph_msg_put(con->in_msg);
3992 con->in_msg = NULL;
3993 + con->ops->put(con);
3994 }
3995
3996 /* Requeue anything that hasn't been acked */
3997 @@ -2175,12 +2383,13 @@ static void ceph_fault(struct ceph_connection *con)
3998 /* If there are no messages queued or keepalive pending, place
3999 * the connection in a STANDBY state */
4000 if (list_empty(&con->out_queue) &&
4001 - !test_bit(KEEPALIVE_PENDING, &con->state)) {
4002 + !test_bit(CON_FLAG_KEEPALIVE_PENDING, &con->flags)) {
4003 dout("fault %p setting STANDBY clearing WRITE_PENDING\n", con);
4004 - clear_bit(WRITE_PENDING, &con->state);
4005 - set_bit(STANDBY, &con->state);
4006 + clear_bit(CON_FLAG_WRITE_PENDING, &con->flags);
4007 + con->state = CON_STATE_STANDBY;
4008 } else {
4009 /* retry after a delay. */
4010 + con->state = CON_STATE_PREOPEN;
4011 if (con->delay == 0)
4012 con->delay = BASE_DELAY_INTERVAL;
4013 else if (con->delay < MAX_DELAY_INTERVAL)
4014 @@ -2201,13 +2410,12 @@ static void ceph_fault(struct ceph_connection *con)
4015 * that when con_work restarts we schedule the
4016 * delay then.
4017 */
4018 - set_bit(BACKOFF, &con->state);
4019 + set_bit(CON_FLAG_BACKOFF, &con->flags);
4020 }
4021 }
4022
4023 out_unlock:
4024 mutex_unlock(&con->mutex);
4025 -out:
4026 /*
4027 * in case we faulted due to authentication, invalidate our
4028 * current tickets so that we can get new ones.
4029 @@ -2224,18 +2432,14 @@ out:
4030
4031
4032 /*
4033 - * create a new messenger instance
4034 + * initialize a new messenger instance
4035 */
4036 -struct ceph_messenger *ceph_messenger_create(struct ceph_entity_addr *myaddr,
4037 - u32 supported_features,
4038 - u32 required_features)
4039 +void ceph_messenger_init(struct ceph_messenger *msgr,
4040 + struct ceph_entity_addr *myaddr,
4041 + u32 supported_features,
4042 + u32 required_features,
4043 + bool nocrc)
4044 {
4045 - struct ceph_messenger *msgr;
4046 -
4047 - msgr = kzalloc(sizeof(*msgr), GFP_KERNEL);
4048 - if (msgr == NULL)
4049 - return ERR_PTR(-ENOMEM);
4050 -
4051 msgr->supported_features = supported_features;
4052 msgr->required_features = required_features;
4053
4054 @@ -2248,30 +2452,23 @@ struct ceph_messenger *ceph_messenger_create(struct ceph_entity_addr *myaddr,
4055 msgr->inst.addr.type = 0;
4056 get_random_bytes(&msgr->inst.addr.nonce, sizeof(msgr->inst.addr.nonce));
4057 encode_my_addr(msgr);
4058 + msgr->nocrc = nocrc;
4059
4060 - dout("messenger_create %p\n", msgr);
4061 - return msgr;
4062 -}
4063 -EXPORT_SYMBOL(ceph_messenger_create);
4064 + atomic_set(&msgr->stopping, 0);
4065
4066 -void ceph_messenger_destroy(struct ceph_messenger *msgr)
4067 -{
4068 - dout("destroy %p\n", msgr);
4069 - kfree(msgr);
4070 - dout("destroyed messenger %p\n", msgr);
4071 + dout("%s %p\n", __func__, msgr);
4072 }
4073 -EXPORT_SYMBOL(ceph_messenger_destroy);
4074 +EXPORT_SYMBOL(ceph_messenger_init);
4075
4076 static void clear_standby(struct ceph_connection *con)
4077 {
4078 /* come back from STANDBY? */
4079 - if (test_and_clear_bit(STANDBY, &con->state)) {
4080 - mutex_lock(&con->mutex);
4081 + if (con->state == CON_STATE_STANDBY) {
4082 dout("clear_standby %p and ++connect_seq\n", con);
4083 + con->state = CON_STATE_PREOPEN;
4084 con->connect_seq++;
4085 - WARN_ON(test_bit(WRITE_PENDING, &con->state));
4086 - WARN_ON(test_bit(KEEPALIVE_PENDING, &con->state));
4087 - mutex_unlock(&con->mutex);
4088 + WARN_ON(test_bit(CON_FLAG_WRITE_PENDING, &con->flags));
4089 + WARN_ON(test_bit(CON_FLAG_KEEPALIVE_PENDING, &con->flags));
4090 }
4091 }
4092
4093 @@ -2280,21 +2477,24 @@ static void clear_standby(struct ceph_connection *con)
4094 */
4095 void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg)
4096 {
4097 - if (test_bit(CLOSED, &con->state)) {
4098 - dout("con_send %p closed, dropping %p\n", con, msg);
4099 - ceph_msg_put(msg);
4100 - return;
4101 - }
4102 -
4103 /* set src+dst */
4104 msg->hdr.src = con->msgr->inst.name;
4105 -
4106 BUG_ON(msg->front.iov_len != le32_to_cpu(msg->hdr.front_len));
4107 -
4108 msg->needs_out_seq = true;
4109
4110 - /* queue */
4111 mutex_lock(&con->mutex);
4112 +
4113 + if (con->state == CON_STATE_CLOSED) {
4114 + dout("con_send %p closed, dropping %p\n", con, msg);
4115 + ceph_msg_put(msg);
4116 + mutex_unlock(&con->mutex);
4117 + return;
4118 + }
4119 +
4120 + BUG_ON(msg->con != NULL);
4121 + msg->con = con->ops->get(con);
4122 + BUG_ON(msg->con == NULL);
4123 +
4124 BUG_ON(!list_empty(&msg->list_head));
4125 list_add_tail(&msg->list_head, &con->out_queue);
4126 dout("----- %p to %s%lld %d=%s len %d+%d+%d -----\n", msg,
4127 @@ -2303,12 +2503,13 @@ void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg)
4128 le32_to_cpu(msg->hdr.front_len),
4129 le32_to_cpu(msg->hdr.middle_len),
4130 le32_to_cpu(msg->hdr.data_len));
4131 +
4132 + clear_standby(con);
4133 mutex_unlock(&con->mutex);
4134
4135 /* if there wasn't anything waiting to send before, queue
4136 * new work */
4137 - clear_standby(con);
4138 - if (test_and_set_bit(WRITE_PENDING, &con->state) == 0)
4139 + if (test_and_set_bit(CON_FLAG_WRITE_PENDING, &con->flags) == 0)
4140 queue_con(con);
4141 }
4142 EXPORT_SYMBOL(ceph_con_send);
4143 @@ -2316,24 +2517,34 @@ EXPORT_SYMBOL(ceph_con_send);
4144 /*
4145 * Revoke a message that was previously queued for send
4146 */
4147 -void ceph_con_revoke(struct ceph_connection *con, struct ceph_msg *msg)
4148 +void ceph_msg_revoke(struct ceph_msg *msg)
4149 {
4150 + struct ceph_connection *con = msg->con;
4151 +
4152 + if (!con)
4153 + return; /* Message not in our possession */
4154 +
4155 mutex_lock(&con->mutex);
4156 if (!list_empty(&msg->list_head)) {
4157 - dout("con_revoke %p msg %p - was on queue\n", con, msg);
4158 + dout("%s %p msg %p - was on queue\n", __func__, con, msg);
4159 list_del_init(&msg->list_head);
4160 - ceph_msg_put(msg);
4161 + BUG_ON(msg->con == NULL);
4162 + msg->con->ops->put(msg->con);
4163 + msg->con = NULL;
4164 msg->hdr.seq = 0;
4165 +
4166 + ceph_msg_put(msg);
4167 }
4168 if (con->out_msg == msg) {
4169 - dout("con_revoke %p msg %p - was sending\n", con, msg);
4170 + dout("%s %p msg %p - was sending\n", __func__, con, msg);
4171 con->out_msg = NULL;
4172 if (con->out_kvec_is_msg) {
4173 con->out_skip = con->out_kvec_bytes;
4174 con->out_kvec_is_msg = false;
4175 }
4176 - ceph_msg_put(msg);
4177 msg->hdr.seq = 0;
4178 +
4179 + ceph_msg_put(msg);
4180 }
4181 mutex_unlock(&con->mutex);
4182 }
4183 @@ -2341,17 +2552,27 @@ void ceph_con_revoke(struct ceph_connection *con, struct ceph_msg *msg)
4184 /*
4185 * Revoke a message that we may be reading data into
4186 */
4187 -void ceph_con_revoke_message(struct ceph_connection *con, struct ceph_msg *msg)
4188 +void ceph_msg_revoke_incoming(struct ceph_msg *msg)
4189 {
4190 + struct ceph_connection *con;
4191 +
4192 + BUG_ON(msg == NULL);
4193 + if (!msg->con) {
4194 + dout("%s msg %p null con\n", __func__, msg);
4195 +
4196 + return; /* Message not in our possession */
4197 + }
4198 +
4199 + con = msg->con;
4200 mutex_lock(&con->mutex);
4201 - if (con->in_msg && con->in_msg == msg) {
4202 + if (con->in_msg == msg) {
4203 unsigned front_len = le32_to_cpu(con->in_hdr.front_len);
4204 unsigned middle_len = le32_to_cpu(con->in_hdr.middle_len);
4205 unsigned data_len = le32_to_cpu(con->in_hdr.data_len);
4206
4207 /* skip rest of message */
4208 - dout("con_revoke_pages %p msg %p revoked\n", con, msg);
4209 - con->in_base_pos = con->in_base_pos -
4210 + dout("%s %p msg %p revoked\n", __func__, con, msg);
4211 + con->in_base_pos = con->in_base_pos -
4212 sizeof(struct ceph_msg_header) -
4213 front_len -
4214 middle_len -
4215 @@ -2362,8 +2583,8 @@ void ceph_con_revoke_message(struct ceph_connection *con, struct ceph_msg *msg)
4216 con->in_tag = CEPH_MSGR_TAG_READY;
4217 con->in_seq++;
4218 } else {
4219 - dout("con_revoke_pages %p msg %p pages %p no-op\n",
4220 - con, con->in_msg, msg);
4221 + dout("%s %p in_msg %p msg %p no-op\n",
4222 + __func__, con, con->in_msg, msg);
4223 }
4224 mutex_unlock(&con->mutex);
4225 }
4226 @@ -2374,9 +2595,11 @@ void ceph_con_revoke_message(struct ceph_connection *con, struct ceph_msg *msg)
4227 void ceph_con_keepalive(struct ceph_connection *con)
4228 {
4229 dout("con_keepalive %p\n", con);
4230 + mutex_lock(&con->mutex);
4231 clear_standby(con);
4232 - if (test_and_set_bit(KEEPALIVE_PENDING, &con->state) == 0 &&
4233 - test_and_set_bit(WRITE_PENDING, &con->state) == 0)
4234 + mutex_unlock(&con->mutex);
4235 + if (test_and_set_bit(CON_FLAG_KEEPALIVE_PENDING, &con->flags) == 0 &&
4236 + test_and_set_bit(CON_FLAG_WRITE_PENDING, &con->flags) == 0)
4237 queue_con(con);
4238 }
4239 EXPORT_SYMBOL(ceph_con_keepalive);
4240 @@ -2395,6 +2618,8 @@ struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags,
4241 if (m == NULL)
4242 goto out;
4243 kref_init(&m->kref);
4244 +
4245 + m->con = NULL;
4246 INIT_LIST_HEAD(&m->list_head);
4247
4248 m->hdr.tid = 0;
4249 @@ -2490,46 +2715,78 @@ static int ceph_alloc_middle(struct ceph_connection *con, struct ceph_msg *msg)
4250 }
4251
4252 /*
4253 - * Generic message allocator, for incoming messages.
4254 + * Allocate a message for receiving an incoming message on a
4255 + * connection, and save the result in con->in_msg. Uses the
4256 + * connection's private alloc_msg op if available.
4257 + *
4258 + * Returns 0 on success, or a negative error code.
4259 + *
4260 + * On success, if we set *skip = 1:
4261 + * - the next message should be skipped and ignored.
4262 + * - con->in_msg == NULL
4263 + * or if we set *skip = 0:
4264 + * - con->in_msg is non-null.
4265 + * On error (ENOMEM, EAGAIN, ...),
4266 + * - con->in_msg == NULL
4267 */
4268 -static struct ceph_msg *ceph_alloc_msg(struct ceph_connection *con,
4269 - struct ceph_msg_header *hdr,
4270 - int *skip)
4271 +static int ceph_con_in_msg_alloc(struct ceph_connection *con, int *skip)
4272 {
4273 + struct ceph_msg_header *hdr = &con->in_hdr;
4274 int type = le16_to_cpu(hdr->type);
4275 int front_len = le32_to_cpu(hdr->front_len);
4276 int middle_len = le32_to_cpu(hdr->middle_len);
4277 - struct ceph_msg *msg = NULL;
4278 - int ret;
4279 + int ret = 0;
4280 +
4281 + BUG_ON(con->in_msg != NULL);
4282
4283 if (con->ops->alloc_msg) {
4284 + struct ceph_msg *msg;
4285 +
4286 mutex_unlock(&con->mutex);
4287 msg = con->ops->alloc_msg(con, hdr, skip);
4288 mutex_lock(&con->mutex);
4289 - if (!msg || *skip)
4290 - return NULL;
4291 + if (con->state != CON_STATE_OPEN) {
4292 + if (msg)
4293 + ceph_msg_put(msg);
4294 + return -EAGAIN;
4295 + }
4296 + con->in_msg = msg;
4297 + if (con->in_msg) {
4298 + con->in_msg->con = con->ops->get(con);
4299 + BUG_ON(con->in_msg->con == NULL);
4300 + }
4301 + if (*skip) {
4302 + con->in_msg = NULL;
4303 + return 0;
4304 + }
4305 + if (!con->in_msg) {
4306 + con->error_msg =
4307 + "error allocating memory for incoming message";
4308 + return -ENOMEM;
4309 + }
4310 }
4311 - if (!msg) {
4312 - *skip = 0;
4313 - msg = ceph_msg_new(type, front_len, GFP_NOFS, false);
4314 - if (!msg) {
4315 + if (!con->in_msg) {
4316 + con->in_msg = ceph_msg_new(type, front_len, GFP_NOFS, false);
4317 + if (!con->in_msg) {
4318 pr_err("unable to allocate msg type %d len %d\n",
4319 type, front_len);
4320 - return NULL;
4321 + return -ENOMEM;
4322 }
4323 - msg->page_alignment = le16_to_cpu(hdr->data_off);
4324 + con->in_msg->con = con->ops->get(con);
4325 + BUG_ON(con->in_msg->con == NULL);
4326 + con->in_msg->page_alignment = le16_to_cpu(hdr->data_off);
4327 }
4328 - memcpy(&msg->hdr, &con->in_hdr, sizeof(con->in_hdr));
4329 + memcpy(&con->in_msg->hdr, &con->in_hdr, sizeof(con->in_hdr));
4330
4331 - if (middle_len && !msg->middle) {
4332 - ret = ceph_alloc_middle(con, msg);
4333 + if (middle_len && !con->in_msg->middle) {
4334 + ret = ceph_alloc_middle(con, con->in_msg);
4335 if (ret < 0) {
4336 - ceph_msg_put(msg);
4337 - return NULL;
4338 + ceph_msg_put(con->in_msg);
4339 + con->in_msg = NULL;
4340 }
4341 }
4342
4343 - return msg;
4344 + return ret;
4345 }
4346
4347
4348 diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
4349 index 1845cde..89a6409 100644
4350 --- a/net/ceph/mon_client.c
4351 +++ b/net/ceph/mon_client.c
4352 @@ -106,9 +106,9 @@ static void __send_prepared_auth_request(struct ceph_mon_client *monc, int len)
4353 monc->pending_auth = 1;
4354 monc->m_auth->front.iov_len = len;
4355 monc->m_auth->hdr.front_len = cpu_to_le32(len);
4356 - ceph_con_revoke(monc->con, monc->m_auth);
4357 + ceph_msg_revoke(monc->m_auth);
4358 ceph_msg_get(monc->m_auth); /* keep our ref */
4359 - ceph_con_send(monc->con, monc->m_auth);
4360 + ceph_con_send(&monc->con, monc->m_auth);
4361 }
4362
4363 /*
4364 @@ -117,8 +117,11 @@ static void __send_prepared_auth_request(struct ceph_mon_client *monc, int len)
4365 static void __close_session(struct ceph_mon_client *monc)
4366 {
4367 dout("__close_session closing mon%d\n", monc->cur_mon);
4368 - ceph_con_revoke(monc->con, monc->m_auth);
4369 - ceph_con_close(monc->con);
4370 + ceph_msg_revoke(monc->m_auth);
4371 + ceph_msg_revoke_incoming(monc->m_auth_reply);
4372 + ceph_msg_revoke(monc->m_subscribe);
4373 + ceph_msg_revoke_incoming(monc->m_subscribe_ack);
4374 + ceph_con_close(&monc->con);
4375 monc->cur_mon = -1;
4376 monc->pending_auth = 0;
4377 ceph_auth_reset(monc->auth);
4378 @@ -142,9 +145,8 @@ static int __open_session(struct ceph_mon_client *monc)
4379 monc->want_next_osdmap = !!monc->want_next_osdmap;
4380
4381 dout("open_session mon%d opening\n", monc->cur_mon);
4382 - monc->con->peer_name.type = CEPH_ENTITY_TYPE_MON;
4383 - monc->con->peer_name.num = cpu_to_le64(monc->cur_mon);
4384 - ceph_con_open(monc->con,
4385 + ceph_con_open(&monc->con,
4386 + CEPH_ENTITY_TYPE_MON, monc->cur_mon,
4387 &monc->monmap->mon_inst[monc->cur_mon].addr);
4388
4389 /* initiatiate authentication handshake */
4390 @@ -226,8 +228,8 @@ static void __send_subscribe(struct ceph_mon_client *monc)
4391
4392 msg->front.iov_len = p - msg->front.iov_base;
4393 msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);
4394 - ceph_con_revoke(monc->con, msg);
4395 - ceph_con_send(monc->con, ceph_msg_get(msg));
4396 + ceph_msg_revoke(msg);
4397 + ceph_con_send(&monc->con, ceph_msg_get(msg));
4398
4399 monc->sub_sent = jiffies | 1; /* never 0 */
4400 }
4401 @@ -247,7 +249,7 @@ static void handle_subscribe_ack(struct ceph_mon_client *monc,
4402 if (monc->hunting) {
4403 pr_info("mon%d %s session established\n",
4404 monc->cur_mon,
4405 - ceph_pr_addr(&monc->con->peer_addr.in_addr));
4406 + ceph_pr_addr(&monc->con.peer_addr.in_addr));
4407 monc->hunting = false;
4408 }
4409 dout("handle_subscribe_ack after %d seconds\n", seconds);
4410 @@ -309,6 +311,17 @@ int ceph_monc_open_session(struct ceph_mon_client *monc)
4411 EXPORT_SYMBOL(ceph_monc_open_session);
4412
4413 /*
4414 + * We require the fsid and global_id in order to initialize our
4415 + * debugfs dir.
4416 + */
4417 +static bool have_debugfs_info(struct ceph_mon_client *monc)
4418 +{
4419 + dout("have_debugfs_info fsid %d globalid %lld\n",
4420 + (int)monc->client->have_fsid, monc->auth->global_id);
4421 + return monc->client->have_fsid && monc->auth->global_id > 0;
4422 +}
4423 +
4424 +/*
4425 * The monitor responds with mount ack indicate mount success. The
4426 * included client ticket allows the client to talk to MDSs and OSDs.
4427 */
4428 @@ -318,9 +331,12 @@ static void ceph_monc_handle_map(struct ceph_mon_client *monc,
4429 struct ceph_client *client = monc->client;
4430 struct ceph_monmap *monmap = NULL, *old = monc->monmap;
4431 void *p, *end;
4432 + int had_debugfs_info, init_debugfs = 0;
4433
4434 mutex_lock(&monc->mutex);
4435
4436 + had_debugfs_info = have_debugfs_info(monc);
4437 +
4438 dout("handle_monmap\n");
4439 p = msg->front.iov_base;
4440 end = p + msg->front.iov_len;
4441 @@ -342,12 +358,22 @@ static void ceph_monc_handle_map(struct ceph_mon_client *monc,
4442
4443 if (!client->have_fsid) {
4444 client->have_fsid = true;
4445 + if (!had_debugfs_info && have_debugfs_info(monc)) {
4446 + pr_info("client%lld fsid %pU\n",
4447 + ceph_client_id(monc->client),
4448 + &monc->client->fsid);
4449 + init_debugfs = 1;
4450 + }
4451 mutex_unlock(&monc->mutex);
4452 - /*
4453 - * do debugfs initialization without mutex to avoid
4454 - * creating a locking dependency
4455 - */
4456 - ceph_debugfs_client_init(client);
4457 +
4458 + if (init_debugfs) {
4459 + /*
4460 + * do debugfs initialization without mutex to avoid
4461 + * creating a locking dependency
4462 + */
4463 + ceph_debugfs_client_init(monc->client);
4464 + }
4465 +
4466 goto out_unlocked;
4467 }
4468 out:
4469 @@ -439,6 +465,7 @@ static struct ceph_msg *get_generic_reply(struct ceph_connection *con,
4470 m = NULL;
4471 } else {
4472 dout("get_generic_reply %lld got %p\n", tid, req->reply);
4473 + *skip = 0;
4474 m = ceph_msg_get(req->reply);
4475 /*
4476 * we don't need to track the connection reading into
4477 @@ -461,7 +488,7 @@ static int do_generic_request(struct ceph_mon_client *monc,
4478 req->request->hdr.tid = cpu_to_le64(req->tid);
4479 __insert_generic_request(monc, req);
4480 monc->num_generic_requests++;
4481 - ceph_con_send(monc->con, ceph_msg_get(req->request));
4482 + ceph_con_send(&monc->con, ceph_msg_get(req->request));
4483 mutex_unlock(&monc->mutex);
4484
4485 err = wait_for_completion_interruptible(&req->completion);
4486 @@ -684,8 +711,9 @@ static void __resend_generic_request(struct ceph_mon_client *monc)
4487
4488 for (p = rb_first(&monc->generic_request_tree); p; p = rb_next(p)) {
4489 req = rb_entry(p, struct ceph_mon_generic_request, node);
4490 - ceph_con_revoke(monc->con, req->request);
4491 - ceph_con_send(monc->con, ceph_msg_get(req->request));
4492 + ceph_msg_revoke(req->request);
4493 + ceph_msg_revoke_incoming(req->reply);
4494 + ceph_con_send(&monc->con, ceph_msg_get(req->request));
4495 }
4496 }
4497
4498 @@ -705,7 +733,7 @@ static void delayed_work(struct work_struct *work)
4499 __close_session(monc);
4500 __open_session(monc); /* continue hunting */
4501 } else {
4502 - ceph_con_keepalive(monc->con);
4503 + ceph_con_keepalive(&monc->con);
4504
4505 __validate_auth(monc);
4506
4507 @@ -760,19 +788,12 @@ int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl)
4508 goto out;
4509
4510 /* connection */
4511 - monc->con = kmalloc(sizeof(*monc->con), GFP_KERNEL);
4512 - if (!monc->con)
4513 - goto out_monmap;
4514 - ceph_con_init(monc->client->msgr, monc->con);
4515 - monc->con->private = monc;
4516 - monc->con->ops = &mon_con_ops;
4517 -
4518 /* authentication */
4519 monc->auth = ceph_auth_init(cl->options->name,
4520 cl->options->key);
4521 if (IS_ERR(monc->auth)) {
4522 err = PTR_ERR(monc->auth);
4523 - goto out_con;
4524 + goto out_monmap;
4525 }
4526 monc->auth->want_keys =
4527 CEPH_ENTITY_TYPE_AUTH | CEPH_ENTITY_TYPE_MON |
4528 @@ -801,6 +822,9 @@ int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl)
4529 if (!monc->m_auth)
4530 goto out_auth_reply;
4531
4532 + ceph_con_init(&monc->con, monc, &mon_con_ops,
4533 + &monc->client->msgr);
4534 +
4535 monc->cur_mon = -1;
4536 monc->hunting = true;
4537 monc->sub_renew_after = jiffies;
4538 @@ -824,8 +848,6 @@ out_subscribe_ack:
4539 ceph_msg_put(monc->m_subscribe_ack);
4540 out_auth:
4541 ceph_auth_destroy(monc->auth);
4542 -out_con:
4543 - monc->con->ops->put(monc->con);
4544 out_monmap:
4545 kfree(monc->monmap);
4546 out:
4547 @@ -841,12 +863,16 @@ void ceph_monc_stop(struct ceph_mon_client *monc)
4548 mutex_lock(&monc->mutex);
4549 __close_session(monc);
4550
4551 - monc->con->private = NULL;
4552 - monc->con->ops->put(monc->con);
4553 - monc->con = NULL;
4554 -
4555 mutex_unlock(&monc->mutex);
4556
4557 + /*
4558 + * flush msgr queue before we destroy ourselves to ensure that:
4559 + * - any work that references our embedded con is finished.
4560 + * - any osd_client or other work that may reference an authorizer
4561 + * finishes before we shut down the auth subsystem.
4562 + */
4563 + ceph_msgr_flush();
4564 +
4565 ceph_auth_destroy(monc->auth);
4566
4567 ceph_msg_put(monc->m_auth);
4568 @@ -863,8 +889,10 @@ static void handle_auth_reply(struct ceph_mon_client *monc,
4569 {
4570 int ret;
4571 int was_auth = 0;
4572 + int had_debugfs_info, init_debugfs = 0;
4573
4574 mutex_lock(&monc->mutex);
4575 + had_debugfs_info = have_debugfs_info(monc);
4576 if (monc->auth->ops)
4577 was_auth = monc->auth->ops->is_authenticated(monc->auth);
4578 monc->pending_auth = 0;
4579 @@ -880,14 +908,29 @@ static void handle_auth_reply(struct ceph_mon_client *monc,
4580 } else if (!was_auth && monc->auth->ops->is_authenticated(monc->auth)) {
4581 dout("authenticated, starting session\n");
4582
4583 - monc->client->msgr->inst.name.type = CEPH_ENTITY_TYPE_CLIENT;
4584 - monc->client->msgr->inst.name.num =
4585 + monc->client->msgr.inst.name.type = CEPH_ENTITY_TYPE_CLIENT;
4586 + monc->client->msgr.inst.name.num =
4587 cpu_to_le64(monc->auth->global_id);
4588
4589 __send_subscribe(monc);
4590 __resend_generic_request(monc);
4591 }
4592 +
4593 + if (!had_debugfs_info && have_debugfs_info(monc)) {
4594 + pr_info("client%lld fsid %pU\n",
4595 + ceph_client_id(monc->client),
4596 + &monc->client->fsid);
4597 + init_debugfs = 1;
4598 + }
4599 mutex_unlock(&monc->mutex);
4600 +
4601 + if (init_debugfs) {
4602 + /*
4603 + * do debugfs initialization without mutex to avoid
4604 + * creating a locking dependency
4605 + */
4606 + ceph_debugfs_client_init(monc->client);
4607 + }
4608 }
4609
4610 static int __validate_auth(struct ceph_mon_client *monc)
4611 @@ -992,6 +1035,8 @@ static struct ceph_msg *mon_alloc_msg(struct ceph_connection *con,
4612 case CEPH_MSG_MDS_MAP:
4613 case CEPH_MSG_OSD_MAP:
4614 m = ceph_msg_new(type, front_len, GFP_NOFS, false);
4615 + if (!m)
4616 + return NULL; /* ENOMEM--return skip == 0 */
4617 break;
4618 }
4619
4620 @@ -1021,7 +1066,7 @@ static void mon_fault(struct ceph_connection *con)
4621 if (!monc->hunting)
4622 pr_info("mon%d %s session lost, "
4623 "hunting for new mon\n", monc->cur_mon,
4624 - ceph_pr_addr(&monc->con->peer_addr.in_addr));
4625 + ceph_pr_addr(&monc->con.peer_addr.in_addr));
4626
4627 __close_session(monc);
4628 if (!monc->hunting) {
4629 @@ -1036,9 +1081,23 @@ out:
4630 mutex_unlock(&monc->mutex);
4631 }
4632
4633 +/*
4634 + * We can ignore refcounting on the connection struct, as all references
4635 + * will come from the messenger workqueue, which is drained prior to
4636 + * mon_client destruction.
4637 + */
4638 +static struct ceph_connection *con_get(struct ceph_connection *con)
4639 +{
4640 + return con;
4641 +}
4642 +
4643 +static void con_put(struct ceph_connection *con)
4644 +{
4645 +}
4646 +
4647 static const struct ceph_connection_operations mon_con_ops = {
4648 - .get = ceph_con_get,
4649 - .put = ceph_con_put,
4650 + .get = con_get,
4651 + .put = con_put,
4652 .dispatch = dispatch,
4653 .fault = mon_fault,
4654 .alloc_msg = mon_alloc_msg,
4655 diff --git a/net/ceph/msgpool.c b/net/ceph/msgpool.c
4656 index 11d5f41..ddec1c1 100644
4657 --- a/net/ceph/msgpool.c
4658 +++ b/net/ceph/msgpool.c
4659 @@ -12,7 +12,7 @@ static void *msgpool_alloc(gfp_t gfp_mask, void *arg)
4660 struct ceph_msgpool *pool = arg;
4661 struct ceph_msg *msg;
4662
4663 - msg = ceph_msg_new(0, pool->front_len, gfp_mask, true);
4664 + msg = ceph_msg_new(pool->type, pool->front_len, gfp_mask, true);
4665 if (!msg) {
4666 dout("msgpool_alloc %s failed\n", pool->name);
4667 } else {
4668 @@ -32,10 +32,11 @@ static void msgpool_free(void *element, void *arg)
4669 ceph_msg_put(msg);
4670 }
4671
4672 -int ceph_msgpool_init(struct ceph_msgpool *pool,
4673 +int ceph_msgpool_init(struct ceph_msgpool *pool, int type,
4674 int front_len, int size, bool blocking, const char *name)
4675 {
4676 dout("msgpool %s init\n", name);
4677 + pool->type = type;
4678 pool->front_len = front_len;
4679 pool->pool = mempool_create(size, msgpool_alloc, msgpool_free, pool);
4680 if (!pool->pool)
4681 @@ -61,7 +62,7 @@ struct ceph_msg *ceph_msgpool_get(struct ceph_msgpool *pool,
4682 WARN_ON(1);
4683
4684 /* try to alloc a fresh message */
4685 - return ceph_msg_new(0, front_len, GFP_NOFS, false);
4686 + return ceph_msg_new(pool->type, front_len, GFP_NOFS, false);
4687 }
4688
4689 msg = mempool_alloc(pool->pool, GFP_NOFS);
4690 diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
4691 index 5e25405..a79dbae 100644
4692 --- a/net/ceph/osd_client.c
4693 +++ b/net/ceph/osd_client.c
4694 @@ -52,7 +52,7 @@ static int op_has_extent(int op)
4695 op == CEPH_OSD_OP_WRITE);
4696 }
4697
4698 -void ceph_calc_raw_layout(struct ceph_osd_client *osdc,
4699 +int ceph_calc_raw_layout(struct ceph_osd_client *osdc,
4700 struct ceph_file_layout *layout,
4701 u64 snapid,
4702 u64 off, u64 *plen, u64 *bno,
4703 @@ -62,12 +62,15 @@ void ceph_calc_raw_layout(struct ceph_osd_client *osdc,
4704 struct ceph_osd_request_head *reqhead = req->r_request->front.iov_base;
4705 u64 orig_len = *plen;
4706 u64 objoff, objlen; /* extent in object */
4707 + int r;
4708
4709 reqhead->snapid = cpu_to_le64(snapid);
4710
4711 /* object extent? */
4712 - ceph_calc_file_object_mapping(layout, off, plen, bno,
4713 - &objoff, &objlen);
4714 + r = ceph_calc_file_object_mapping(layout, off, plen, bno,
4715 + &objoff, &objlen);
4716 + if (r < 0)
4717 + return r;
4718 if (*plen < orig_len)
4719 dout(" skipping last %llu, final file extent %llu~%llu\n",
4720 orig_len - *plen, off, *plen);
4721 @@ -83,7 +86,7 @@ void ceph_calc_raw_layout(struct ceph_osd_client *osdc,
4722
4723 dout("calc_layout bno=%llx %llu~%llu (%d pages)\n",
4724 *bno, objoff, objlen, req->r_num_pages);
4725 -
4726 + return 0;
4727 }
4728 EXPORT_SYMBOL(ceph_calc_raw_layout);
4729
4730 @@ -112,20 +115,25 @@ EXPORT_SYMBOL(ceph_calc_raw_layout);
4731 *
4732 * fill osd op in request message.
4733 */
4734 -static void calc_layout(struct ceph_osd_client *osdc,
4735 - struct ceph_vino vino,
4736 - struct ceph_file_layout *layout,
4737 - u64 off, u64 *plen,
4738 - struct ceph_osd_request *req,
4739 - struct ceph_osd_req_op *op)
4740 +static int calc_layout(struct ceph_osd_client *osdc,
4741 + struct ceph_vino vino,
4742 + struct ceph_file_layout *layout,
4743 + u64 off, u64 *plen,
4744 + struct ceph_osd_request *req,
4745 + struct ceph_osd_req_op *op)
4746 {
4747 u64 bno;
4748 + int r;
4749
4750 - ceph_calc_raw_layout(osdc, layout, vino.snap, off,
4751 - plen, &bno, req, op);
4752 + r = ceph_calc_raw_layout(osdc, layout, vino.snap, off,
4753 + plen, &bno, req, op);
4754 + if (r < 0)
4755 + return r;
4756
4757 snprintf(req->r_oid, sizeof(req->r_oid), "%llx.%08llx", vino.ino, bno);
4758 req->r_oid_len = strlen(req->r_oid);
4759 +
4760 + return r;
4761 }
4762
4763 /*
4764 @@ -139,15 +147,14 @@ void ceph_osdc_release_request(struct kref *kref)
4765
4766 if (req->r_request)
4767 ceph_msg_put(req->r_request);
4768 - if (req->r_reply)
4769 - ceph_msg_put(req->r_reply);
4770 if (req->r_con_filling_msg) {
4771 - dout("release_request revoking pages %p from con %p\n",
4772 + dout("%s revoking pages %p from con %p\n", __func__,
4773 req->r_pages, req->r_con_filling_msg);
4774 - ceph_con_revoke_message(req->r_con_filling_msg,
4775 - req->r_reply);
4776 - ceph_con_put(req->r_con_filling_msg);
4777 + ceph_msg_revoke_incoming(req->r_reply);
4778 + req->r_con_filling_msg->ops->put(req->r_con_filling_msg);
4779 }
4780 + if (req->r_reply)
4781 + ceph_msg_put(req->r_reply);
4782 if (req->r_own_pages)
4783 ceph_release_page_vector(req->r_pages,
4784 req->r_num_pages);
4785 @@ -243,6 +250,7 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
4786 }
4787 ceph_pagelist_init(req->r_trail);
4788 }
4789 +
4790 /* create request message; allow space for oid */
4791 msg_size += MAX_OBJ_NAME_SIZE;
4792 if (snapc)
4793 @@ -256,7 +264,6 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
4794 return NULL;
4795 }
4796
4797 - msg->hdr.type = cpu_to_le16(CEPH_MSG_OSD_OP);
4798 memset(msg->front.iov_base, 0, msg->front.iov_len);
4799
4800 req->r_request = msg;
4801 @@ -278,7 +285,7 @@ static void osd_req_encode_op(struct ceph_osd_request *req,
4802 {
4803 dst->op = cpu_to_le16(src->op);
4804
4805 - switch (dst->op) {
4806 + switch (src->op) {
4807 case CEPH_OSD_OP_READ:
4808 case CEPH_OSD_OP_WRITE:
4809 dst->extent.offset =
4810 @@ -624,7 +631,7 @@ static void osd_reset(struct ceph_connection *con)
4811 /*
4812 * Track open sessions with osds.
4813 */
4814 -static struct ceph_osd *create_osd(struct ceph_osd_client *osdc)
4815 +static struct ceph_osd *create_osd(struct ceph_osd_client *osdc, int onum)
4816 {
4817 struct ceph_osd *osd;
4818
4819 @@ -634,15 +641,13 @@ static struct ceph_osd *create_osd(struct ceph_osd_client *osdc)
4820
4821 atomic_set(&osd->o_ref, 1);
4822 osd->o_osdc = osdc;
4823 + osd->o_osd = onum;
4824 INIT_LIST_HEAD(&osd->o_requests);
4825 INIT_LIST_HEAD(&osd->o_linger_requests);
4826 INIT_LIST_HEAD(&osd->o_osd_lru);
4827 osd->o_incarnation = 1;
4828
4829 - ceph_con_init(osdc->client->msgr, &osd->o_con);
4830 - osd->o_con.private = osd;
4831 - osd->o_con.ops = &osd_con_ops;
4832 - osd->o_con.peer_name.type = CEPH_ENTITY_TYPE_OSD;
4833 + ceph_con_init(&osd->o_con, osd, &osd_con_ops, &osdc->client->msgr);
4834
4835 INIT_LIST_HEAD(&osd->o_keepalive_item);
4836 return osd;
4837 @@ -664,11 +669,11 @@ static void put_osd(struct ceph_osd *osd)
4838 {
4839 dout("put_osd %p %d -> %d\n", osd, atomic_read(&osd->o_ref),
4840 atomic_read(&osd->o_ref) - 1);
4841 - if (atomic_dec_and_test(&osd->o_ref)) {
4842 + if (atomic_dec_and_test(&osd->o_ref) && osd->o_auth.authorizer) {
4843 struct ceph_auth_client *ac = osd->o_osdc->client->monc.auth;
4844
4845 - if (osd->o_authorizer)
4846 - ac->ops->destroy_authorizer(ac, osd->o_authorizer);
4847 + if (ac->ops && ac->ops->destroy_authorizer)
4848 + ac->ops->destroy_authorizer(ac, osd->o_auth.authorizer);
4849 kfree(osd);
4850 }
4851 }
4852 @@ -752,7 +757,8 @@ static int __reset_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd)
4853 ret = -EAGAIN;
4854 } else {
4855 ceph_con_close(&osd->o_con);
4856 - ceph_con_open(&osd->o_con, &osdc->osdmap->osd_addr[osd->o_osd]);
4857 + ceph_con_open(&osd->o_con, CEPH_ENTITY_TYPE_OSD, osd->o_osd,
4858 + &osdc->osdmap->osd_addr[osd->o_osd]);
4859 osd->o_incarnation++;
4860 }
4861 return ret;
4862 @@ -841,13 +847,19 @@ static void register_request(struct ceph_osd_client *osdc,
4863 static void __unregister_request(struct ceph_osd_client *osdc,
4864 struct ceph_osd_request *req)
4865 {
4866 + if (RB_EMPTY_NODE(&req->r_node)) {
4867 + dout("__unregister_request %p tid %lld not registered\n",
4868 + req, req->r_tid);
4869 + return;
4870 + }
4871 +
4872 dout("__unregister_request %p tid %lld\n", req, req->r_tid);
4873 rb_erase(&req->r_node, &osdc->requests);
4874 osdc->num_requests--;
4875
4876 if (req->r_osd) {
4877 /* make sure the original request isn't in flight. */
4878 - ceph_con_revoke(&req->r_osd->o_con, req->r_request);
4879 + ceph_msg_revoke(req->r_request);
4880
4881 list_del_init(&req->r_osd_item);
4882 if (list_empty(&req->r_osd->o_requests) &&
4883 @@ -874,7 +886,7 @@ static void __unregister_request(struct ceph_osd_client *osdc,
4884 static void __cancel_request(struct ceph_osd_request *req)
4885 {
4886 if (req->r_sent && req->r_osd) {
4887 - ceph_con_revoke(&req->r_osd->o_con, req->r_request);
4888 + ceph_msg_revoke(req->r_request);
4889 req->r_sent = 0;
4890 }
4891 }
4892 @@ -884,7 +896,9 @@ static void __register_linger_request(struct ceph_osd_client *osdc,
4893 {
4894 dout("__register_linger_request %p\n", req);
4895 list_add_tail(&req->r_linger_item, &osdc->req_linger);
4896 - list_add_tail(&req->r_linger_osd, &req->r_osd->o_linger_requests);
4897 + if (req->r_osd)
4898 + list_add_tail(&req->r_linger_osd,
4899 + &req->r_osd->o_linger_requests);
4900 }
4901
4902 static void __unregister_linger_request(struct ceph_osd_client *osdc,
4903 @@ -992,18 +1006,18 @@ static int __map_request(struct ceph_osd_client *osdc,
4904 req->r_osd = __lookup_osd(osdc, o);
4905 if (!req->r_osd && o >= 0) {
4906 err = -ENOMEM;
4907 - req->r_osd = create_osd(osdc);
4908 + req->r_osd = create_osd(osdc, o);
4909 if (!req->r_osd) {
4910 list_move(&req->r_req_lru_item, &osdc->req_notarget);
4911 goto out;
4912 }
4913
4914 dout("map_request osd %p is osd%d\n", req->r_osd, o);
4915 - req->r_osd->o_osd = o;
4916 - req->r_osd->o_con.peer_name.num = cpu_to_le64(o);
4917 __insert_osd(osdc, req->r_osd);
4918
4919 - ceph_con_open(&req->r_osd->o_con, &osdc->osdmap->osd_addr[o]);
4920 + ceph_con_open(&req->r_osd->o_con,
4921 + CEPH_ENTITY_TYPE_OSD, o,
4922 + &osdc->osdmap->osd_addr[o]);
4923 }
4924
4925 if (req->r_osd) {
4926 @@ -1210,7 +1224,7 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
4927 if (req->r_con_filling_msg == con && req->r_reply == msg) {
4928 dout(" dropping con_filling_msg ref %p\n", con);
4929 req->r_con_filling_msg = NULL;
4930 - ceph_con_put(con);
4931 + con->ops->put(con);
4932 }
4933
4934 if (!req->r_got_reply) {
4935 @@ -1298,8 +1312,9 @@ static void kick_requests(struct ceph_osd_client *osdc, int force_resend)
4936
4937 dout("kick_requests %s\n", force_resend ? " (force resend)" : "");
4938 mutex_lock(&osdc->request_mutex);
4939 - for (p = rb_first(&osdc->requests); p; p = rb_next(p)) {
4940 + for (p = rb_first(&osdc->requests); p; ) {
4941 req = rb_entry(p, struct ceph_osd_request, r_node);
4942 + p = rb_next(p);
4943 err = __map_request(osdc, req, force_resend);
4944 if (err < 0)
4945 continue; /* error */
4946 @@ -1307,10 +1322,23 @@ static void kick_requests(struct ceph_osd_client *osdc, int force_resend)
4947 dout("%p tid %llu maps to no osd\n", req, req->r_tid);
4948 needmap++; /* request a newer map */
4949 } else if (err > 0) {
4950 - dout("%p tid %llu requeued on osd%d\n", req, req->r_tid,
4951 - req->r_osd ? req->r_osd->o_osd : -1);
4952 - if (!req->r_linger)
4953 + if (!req->r_linger) {
4954 + dout("%p tid %llu requeued on osd%d\n", req,
4955 + req->r_tid,
4956 + req->r_osd ? req->r_osd->o_osd : -1);
4957 req->r_flags |= CEPH_OSD_FLAG_RETRY;
4958 + }
4959 + }
4960 + if (req->r_linger && list_empty(&req->r_linger_item)) {
4961 + /*
4962 + * register as a linger so that we will
4963 + * re-submit below and get a new tid
4964 + */
4965 + dout("%p tid %llu restart on osd%d\n",
4966 + req, req->r_tid,
4967 + req->r_osd ? req->r_osd->o_osd : -1);
4968 + __register_linger_request(osdc, req);
4969 + __unregister_request(osdc, req);
4970 }
4971 }
4972
4973 @@ -1385,7 +1413,7 @@ void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg)
4974 epoch, maplen);
4975 newmap = osdmap_apply_incremental(&p, next,
4976 osdc->osdmap,
4977 - osdc->client->msgr);
4978 + &osdc->client->msgr);
4979 if (IS_ERR(newmap)) {
4980 err = PTR_ERR(newmap);
4981 goto bad;
4982 @@ -1833,11 +1861,12 @@ int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client)
4983 if (!osdc->req_mempool)
4984 goto out;
4985
4986 - err = ceph_msgpool_init(&osdc->msgpool_op, OSD_OP_FRONT_LEN, 10, true,
4987 + err = ceph_msgpool_init(&osdc->msgpool_op, CEPH_MSG_OSD_OP,
4988 + OSD_OP_FRONT_LEN, 10, true,
4989 "osd_op");
4990 if (err < 0)
4991 goto out_mempool;
4992 - err = ceph_msgpool_init(&osdc->msgpool_op_reply,
4993 + err = ceph_msgpool_init(&osdc->msgpool_op_reply, CEPH_MSG_OSD_OPREPLY,
4994 OSD_OPREPLY_FRONT_LEN, 10, true,
4995 "osd_op_reply");
4996 if (err < 0)
4997 @@ -2019,10 +2048,10 @@ static struct ceph_msg *get_reply(struct ceph_connection *con,
4998 }
4999
5000 if (req->r_con_filling_msg) {
5001 - dout("get_reply revoking msg %p from old con %p\n",
5002 + dout("%s revoking msg %p from old con %p\n", __func__,
5003 req->r_reply, req->r_con_filling_msg);
5004 - ceph_con_revoke_message(req->r_con_filling_msg, req->r_reply);
5005 - ceph_con_put(req->r_con_filling_msg);
5006 + ceph_msg_revoke_incoming(req->r_reply);
5007 + req->r_con_filling_msg->ops->put(req->r_con_filling_msg);
5008 req->r_con_filling_msg = NULL;
5009 }
5010
5011 @@ -2057,7 +2086,7 @@ static struct ceph_msg *get_reply(struct ceph_connection *con,
5012 #endif
5013 }
5014 *skip = 0;
5015 - req->r_con_filling_msg = ceph_con_get(con);
5016 + req->r_con_filling_msg = con->ops->get(con);
5017 dout("get_reply tid %lld %p\n", tid, m);
5018
5019 out:
5020 @@ -2074,6 +2103,7 @@ static struct ceph_msg *alloc_msg(struct ceph_connection *con,
5021 int type = le16_to_cpu(hdr->type);
5022 int front = le32_to_cpu(hdr->front_len);
5023
5024 + *skip = 0;
5025 switch (type) {
5026 case CEPH_MSG_OSD_MAP:
5027 case CEPH_MSG_WATCH_NOTIFY:
5028 @@ -2108,37 +2138,32 @@ static void put_osd_con(struct ceph_connection *con)
5029 /*
5030 * authentication
5031 */
5032 -static int get_authorizer(struct ceph_connection *con,
5033 - void **buf, int *len, int *proto,
5034 - void **reply_buf, int *reply_len, int force_new)
5035 +/*
5036 + * Note: returned pointer is the address of a structure that's
5037 + * managed separately. Caller must *not* attempt to free it.
5038 + */
5039 +static struct ceph_auth_handshake *get_authorizer(struct ceph_connection *con,
5040 + int *proto, int force_new)
5041 {
5042 struct ceph_osd *o = con->private;
5043 struct ceph_osd_client *osdc = o->o_osdc;
5044 struct ceph_auth_client *ac = osdc->client->monc.auth;
5045 - int ret = 0;
5046 + struct ceph_auth_handshake *auth = &o->o_auth;
5047
5048 - if (force_new && o->o_authorizer) {
5049 - ac->ops->destroy_authorizer(ac, o->o_authorizer);
5050 - o->o_authorizer = NULL;
5051 - }
5052 - if (o->o_authorizer == NULL) {
5053 - ret = ac->ops->create_authorizer(
5054 - ac, CEPH_ENTITY_TYPE_OSD,
5055 - &o->o_authorizer,
5056 - &o->o_authorizer_buf,
5057 - &o->o_authorizer_buf_len,
5058 - &o->o_authorizer_reply_buf,
5059 - &o->o_authorizer_reply_buf_len);
5060 + if (force_new && auth->authorizer) {
5061 + if (ac->ops && ac->ops->destroy_authorizer)
5062 + ac->ops->destroy_authorizer(ac, auth->authorizer);
5063 + auth->authorizer = NULL;
5064 + }
5065 + if (!auth->authorizer && ac->ops && ac->ops->create_authorizer) {
5066 + int ret = ac->ops->create_authorizer(ac, CEPH_ENTITY_TYPE_OSD,
5067 + auth);
5068 if (ret)
5069 - return ret;
5070 + return ERR_PTR(ret);
5071 }
5072 -
5073 *proto = ac->protocol;
5074 - *buf = o->o_authorizer_buf;
5075 - *len = o->o_authorizer_buf_len;
5076 - *reply_buf = o->o_authorizer_reply_buf;
5077 - *reply_len = o->o_authorizer_reply_buf_len;
5078 - return 0;
5079 +
5080 + return auth;
5081 }
5082
5083
5084 @@ -2148,7 +2173,11 @@ static int verify_authorizer_reply(struct ceph_connection *con, int len)
5085 struct ceph_osd_client *osdc = o->o_osdc;
5086 struct ceph_auth_client *ac = osdc->client->monc.auth;
5087
5088 - return ac->ops->verify_authorizer_reply(ac, o->o_authorizer, len);
5089 + /*
5090 + * XXX If ac->ops or ac->ops->verify_authorizer_reply is null,
5091 + * XXX which do we do: succeed or fail?
5092 + */
5093 + return ac->ops->verify_authorizer_reply(ac, o->o_auth.authorizer, len);
5094 }
5095
5096 static int invalidate_authorizer(struct ceph_connection *con)
5097 @@ -2157,7 +2186,7 @@ static int invalidate_authorizer(struct ceph_connection *con)
5098 struct ceph_osd_client *osdc = o->o_osdc;
5099 struct ceph_auth_client *ac = osdc->client->monc.auth;
5100
5101 - if (ac->ops->invalidate_authorizer)
5102 + if (ac->ops && ac->ops->invalidate_authorizer)
5103 ac->ops->invalidate_authorizer(ac, CEPH_ENTITY_TYPE_OSD);
5104
5105 return ceph_monc_validate_auth(&osdc->client->monc);
5106 diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
5107 index 29ad46e..430076e 100644
5108 --- a/net/ceph/osdmap.c
5109 +++ b/net/ceph/osdmap.c
5110 @@ -495,15 +495,16 @@ static int __decode_pool_names(void **p, void *end, struct ceph_osdmap *map)
5111 ceph_decode_32_safe(p, end, pool, bad);
5112 ceph_decode_32_safe(p, end, len, bad);
5113 dout(" pool %d len %d\n", pool, len);
5114 + ceph_decode_need(p, end, len, bad);
5115 pi = __lookup_pg_pool(&map->pg_pools, pool);
5116 if (pi) {
5117 + char *name = kstrndup(*p, len, GFP_NOFS);
5118 +
5119 + if (!name)
5120 + return -ENOMEM;
5121 kfree(pi->name);
5122 - pi->name = kmalloc(len + 1, GFP_NOFS);
5123 - if (pi->name) {
5124 - memcpy(pi->name, *p, len);
5125 - pi->name[len] = '\0';
5126 - dout(" name is %s\n", pi->name);
5127 - }
5128 + pi->name = name;
5129 + dout(" name is %s\n", pi->name);
5130 }
5131 *p += len;
5132 }
5133 @@ -673,6 +674,9 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end)
5134 ceph_decode_need(p, end, sizeof(u32) + sizeof(u64), bad);
5135 ceph_decode_copy(p, &pgid, sizeof(pgid));
5136 n = ceph_decode_32(p);
5137 + err = -EINVAL;
5138 + if (n > (UINT_MAX - sizeof(*pg)) / sizeof(u32))
5139 + goto bad;
5140 ceph_decode_need(p, end, n * sizeof(u32), bad);
5141 err = -ENOMEM;
5142 pg = kmalloc(sizeof(*pg) + n*sizeof(u32), GFP_NOFS);
5143 @@ -890,8 +894,16 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
5144 pglen = ceph_decode_32(p);
5145
5146 if (pglen) {
5147 - /* insert */
5148 ceph_decode_need(p, end, pglen*sizeof(u32), bad);
5149 +
5150 + /* removing existing (if any) */
5151 + (void) __remove_pg_mapping(&map->pg_temp, pgid);
5152 +
5153 + /* insert */
5154 + if (pglen > (UINT_MAX - sizeof(*pg)) / sizeof(u32)) {
5155 + err = -EINVAL;
5156 + goto bad;
5157 + }
5158 pg = kmalloc(sizeof(*pg) + sizeof(u32)*pglen, GFP_NOFS);
5159 if (!pg) {
5160 err = -ENOMEM;
5161 @@ -940,7 +952,7 @@ bad:
5162 * for now, we write only a single su, until we can
5163 * pass a stride back to the caller.
5164 */
5165 -void ceph_calc_file_object_mapping(struct ceph_file_layout *layout,
5166 +int ceph_calc_file_object_mapping(struct ceph_file_layout *layout,
5167 u64 off, u64 *plen,
5168 u64 *ono,
5169 u64 *oxoff, u64 *oxlen)
5170 @@ -954,11 +966,17 @@ void ceph_calc_file_object_mapping(struct ceph_file_layout *layout,
5171
5172 dout("mapping %llu~%llu osize %u fl_su %u\n", off, *plen,
5173 osize, su);
5174 + if (su == 0 || sc == 0)
5175 + goto invalid;
5176 su_per_object = osize / su;
5177 + if (su_per_object == 0)
5178 + goto invalid;
5179 dout("osize %u / su %u = su_per_object %u\n", osize, su,
5180 su_per_object);
5181
5182 - BUG_ON((su & ~PAGE_MASK) != 0);
5183 + if ((su & ~PAGE_MASK) != 0)
5184 + goto invalid;
5185 +
5186 /* bl = *off / su; */
5187 t = off;
5188 do_div(t, su);
5189 @@ -986,6 +1004,14 @@ void ceph_calc_file_object_mapping(struct ceph_file_layout *layout,
5190 *plen = *oxlen;
5191
5192 dout(" obj extent %llu~%llu\n", *oxoff, *oxlen);
5193 + return 0;
5194 +
5195 +invalid:
5196 + dout(" invalid layout\n");
5197 + *ono = 0;
5198 + *oxoff = 0;
5199 + *oxlen = 0;
5200 + return -EINVAL;
5201 }
5202 EXPORT_SYMBOL(ceph_calc_file_object_mapping);
5203
5204 diff --git a/net/core/dev.c b/net/core/dev.c
5205 index 24a21f3..eb858dc 100644
5206 --- a/net/core/dev.c
5207 +++ b/net/core/dev.c
5208 @@ -2763,8 +2763,10 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
5209 if (unlikely(tcpu != next_cpu) &&
5210 (tcpu == RPS_NO_CPU || !cpu_online(tcpu) ||
5211 ((int)(per_cpu(softnet_data, tcpu).input_queue_head -
5212 - rflow->last_qtail)) >= 0))
5213 + rflow->last_qtail)) >= 0)) {
5214 + tcpu = next_cpu;
5215 rflow = set_rps_cpu(dev, skb, rflow, next_cpu);
5216 + }
5217
5218 if (tcpu != RPS_NO_CPU && cpu_online(tcpu)) {
5219 *rflowp = rflow;
5220 diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c
5221 index 626698f..76f6d0b 100644
5222 --- a/net/core/dev_addr_lists.c
5223 +++ b/net/core/dev_addr_lists.c
5224 @@ -308,7 +308,8 @@ int dev_addr_del(struct net_device *dev, unsigned char *addr,
5225 */
5226 ha = list_first_entry(&dev->dev_addrs.list,
5227 struct netdev_hw_addr, list);
5228 - if (ha->addr == dev->dev_addr && ha->refcount == 1)
5229 + if (!memcmp(ha->addr, addr, dev->addr_len) &&
5230 + ha->type == addr_type && ha->refcount == 1)
5231 return -ENOENT;
5232
5233 err = __hw_addr_del(&dev->dev_addrs, addr, dev->addr_len,
5234 diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
5235 index 2fd0fba..59ef40a 100644
5236 --- a/net/ipv4/ip_sockglue.c
5237 +++ b/net/ipv4/ip_sockglue.c
5238 @@ -456,19 +456,28 @@ static int do_ip_setsockopt(struct sock *sk, int level,
5239 struct inet_sock *inet = inet_sk(sk);
5240 int val = 0, err;
5241
5242 - if (((1<<optname) & ((1<<IP_PKTINFO) | (1<<IP_RECVTTL) |
5243 - (1<<IP_RECVOPTS) | (1<<IP_RECVTOS) |
5244 - (1<<IP_RETOPTS) | (1<<IP_TOS) |
5245 - (1<<IP_TTL) | (1<<IP_HDRINCL) |
5246 - (1<<IP_MTU_DISCOVER) | (1<<IP_RECVERR) |
5247 - (1<<IP_ROUTER_ALERT) | (1<<IP_FREEBIND) |
5248 - (1<<IP_PASSSEC) | (1<<IP_TRANSPARENT) |
5249 - (1<<IP_MINTTL) | (1<<IP_NODEFRAG))) ||
5250 - optname == IP_UNICAST_IF ||
5251 - optname == IP_MULTICAST_TTL ||
5252 - optname == IP_MULTICAST_ALL ||
5253 - optname == IP_MULTICAST_LOOP ||
5254 - optname == IP_RECVORIGDSTADDR) {
5255 + switch (optname) {
5256 + case IP_PKTINFO:
5257 + case IP_RECVTTL:
5258 + case IP_RECVOPTS:
5259 + case IP_RECVTOS:
5260 + case IP_RETOPTS:
5261 + case IP_TOS:
5262 + case IP_TTL:
5263 + case IP_HDRINCL:
5264 + case IP_MTU_DISCOVER:
5265 + case IP_RECVERR:
5266 + case IP_ROUTER_ALERT:
5267 + case IP_FREEBIND:
5268 + case IP_PASSSEC:
5269 + case IP_TRANSPARENT:
5270 + case IP_MINTTL:
5271 + case IP_NODEFRAG:
5272 + case IP_UNICAST_IF:
5273 + case IP_MULTICAST_TTL:
5274 + case IP_MULTICAST_ALL:
5275 + case IP_MULTICAST_LOOP:
5276 + case IP_RECVORIGDSTADDR:
5277 if (optlen >= sizeof(int)) {
5278 if (get_user(val, (int __user *) optval))
5279 return -EFAULT;
5280 diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c
5281 index 3828a42..da4098f 100644
5282 --- a/net/ipv4/netfilter/nf_nat_standalone.c
5283 +++ b/net/ipv4/netfilter/nf_nat_standalone.c
5284 @@ -194,7 +194,8 @@ nf_nat_out(unsigned int hooknum,
5285
5286 if ((ct->tuplehash[dir].tuple.src.u3.ip !=
5287 ct->tuplehash[!dir].tuple.dst.u3.ip) ||
5288 - (ct->tuplehash[dir].tuple.src.u.all !=
5289 + (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP &&
5290 + ct->tuplehash[dir].tuple.src.u.all !=
5291 ct->tuplehash[!dir].tuple.dst.u.all)
5292 )
5293 return ip_xfrm_me_harder(skb) == 0 ? ret : NF_DROP;
5294 @@ -230,7 +231,8 @@ nf_nat_local_fn(unsigned int hooknum,
5295 ret = NF_DROP;
5296 }
5297 #ifdef CONFIG_XFRM
5298 - else if (ct->tuplehash[dir].tuple.dst.u.all !=
5299 + else if (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP &&
5300 + ct->tuplehash[dir].tuple.dst.u.all !=
5301 ct->tuplehash[!dir].tuple.src.u.all)
5302 if (ip_xfrm_me_harder(skb))
5303 ret = NF_DROP;
5304 diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
5305 index 63dd1f8..34c1109 100644
5306 --- a/net/ipv6/ipv6_sockglue.c
5307 +++ b/net/ipv6/ipv6_sockglue.c
5308 @@ -828,6 +828,7 @@ pref_skip_coa:
5309 if (val < 0 || val > 255)
5310 goto e_inval;
5311 np->min_hopcount = val;
5312 + retv = 0;
5313 break;
5314 case IPV6_DONTFRAG:
5315 np->dontfrag = valbool;
5316 diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
5317 index db8fae5..498e87b 100644
5318 --- a/net/mac80211/ieee80211_i.h
5319 +++ b/net/mac80211/ieee80211_i.h
5320 @@ -1297,6 +1297,8 @@ netdev_tx_t ieee80211_monitor_start_xmit(struct sk_buff *skb,
5321 struct net_device *dev);
5322 netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb,
5323 struct net_device *dev);
5324 +void ieee80211_purge_tx_queue(struct ieee80211_hw *hw,
5325 + struct sk_buff_head *skbs);
5326
5327 /* HT */
5328 bool ieee80111_cfg_override_disables_ht40(struct ieee80211_sub_if_data *sdata);
5329 diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
5330 index d93d39b..6d25d77 100644
5331 --- a/net/mac80211/sta_info.c
5332 +++ b/net/mac80211/sta_info.c
5333 @@ -738,8 +738,8 @@ int __must_check __sta_info_destroy(struct sta_info *sta)
5334
5335 for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
5336 local->total_ps_buffered -= skb_queue_len(&sta->ps_tx_buf[ac]);
5337 - __skb_queue_purge(&sta->ps_tx_buf[ac]);
5338 - __skb_queue_purge(&sta->tx_filtered[ac]);
5339 + ieee80211_purge_tx_queue(&local->hw, &sta->ps_tx_buf[ac]);
5340 + ieee80211_purge_tx_queue(&local->hw, &sta->tx_filtered[ac]);
5341 }
5342
5343 #ifdef CONFIG_MAC80211_MESH
5344 @@ -774,7 +774,7 @@ int __must_check __sta_info_destroy(struct sta_info *sta)
5345 tid_tx = rcu_dereference_raw(sta->ampdu_mlme.tid_tx[i]);
5346 if (!tid_tx)
5347 continue;
5348 - __skb_queue_purge(&tid_tx->pending);
5349 + ieee80211_purge_tx_queue(&local->hw, &tid_tx->pending);
5350 kfree(tid_tx);
5351 }
5352
5353 @@ -959,6 +959,7 @@ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta)
5354 struct ieee80211_local *local = sdata->local;
5355 struct sk_buff_head pending;
5356 int filtered = 0, buffered = 0, ac;
5357 + unsigned long flags;
5358
5359 clear_sta_flag(sta, WLAN_STA_SP);
5360
5361 @@ -974,12 +975,16 @@ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta)
5362 for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
5363 int count = skb_queue_len(&pending), tmp;
5364
5365 + spin_lock_irqsave(&sta->tx_filtered[ac].lock, flags);
5366 skb_queue_splice_tail_init(&sta->tx_filtered[ac], &pending);
5367 + spin_unlock_irqrestore(&sta->tx_filtered[ac].lock, flags);
5368 tmp = skb_queue_len(&pending);
5369 filtered += tmp - count;
5370 count = tmp;
5371
5372 + spin_lock_irqsave(&sta->ps_tx_buf[ac].lock, flags);
5373 skb_queue_splice_tail_init(&sta->ps_tx_buf[ac], &pending);
5374 + spin_unlock_irqrestore(&sta->ps_tx_buf[ac].lock, flags);
5375 tmp = skb_queue_len(&pending);
5376 buffered += tmp - count;
5377 }
5378 diff --git a/net/mac80211/status.c b/net/mac80211/status.c
5379 index 5f8f89e..47b117f 100644
5380 --- a/net/mac80211/status.c
5381 +++ b/net/mac80211/status.c
5382 @@ -660,3 +660,12 @@ void ieee80211_free_txskb(struct ieee80211_hw *hw, struct sk_buff *skb)
5383 dev_kfree_skb_any(skb);
5384 }
5385 EXPORT_SYMBOL(ieee80211_free_txskb);
5386 +
5387 +void ieee80211_purge_tx_queue(struct ieee80211_hw *hw,
5388 + struct sk_buff_head *skbs)
5389 +{
5390 + struct sk_buff *skb;
5391 +
5392 + while ((skb = __skb_dequeue(skbs)))
5393 + ieee80211_free_txskb(hw, skb);
5394 +}
5395 diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
5396 index e76facc..eace766 100644
5397 --- a/net/mac80211/tx.c
5398 +++ b/net/mac80211/tx.c
5399 @@ -1357,7 +1357,7 @@ static int invoke_tx_handlers(struct ieee80211_tx_data *tx)
5400 if (tx->skb)
5401 dev_kfree_skb(tx->skb);
5402 else
5403 - __skb_queue_purge(&tx->skbs);
5404 + ieee80211_purge_tx_queue(&tx->local->hw, &tx->skbs);
5405 return -1;
5406 } else if (unlikely(res == TX_QUEUED)) {
5407 I802_DEBUG_INC(tx->local->tx_handlers_queued);
5408 @@ -2126,10 +2126,13 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb,
5409 */
5410 void ieee80211_clear_tx_pending(struct ieee80211_local *local)
5411 {
5412 + struct sk_buff *skb;
5413 int i;
5414
5415 - for (i = 0; i < local->hw.queues; i++)
5416 - skb_queue_purge(&local->pending[i]);
5417 + for (i = 0; i < local->hw.queues; i++) {
5418 + while ((skb = skb_dequeue(&local->pending[i])) != NULL)
5419 + ieee80211_free_txskb(&local->hw, skb);
5420 + }
5421 }
5422
5423 /*
5424 diff --git a/net/mac80211/util.c b/net/mac80211/util.c
5425 index 266d092..73ef163 100644
5426 --- a/net/mac80211/util.c
5427 +++ b/net/mac80211/util.c
5428 @@ -1341,6 +1341,8 @@ int ieee80211_reconfig(struct ieee80211_local *local)
5429 list_for_each_entry(sdata, &local->interfaces, list) {
5430 if (sdata->vif.type != NL80211_IFTYPE_STATION)
5431 continue;
5432 + if (!sdata->u.mgd.associated)
5433 + continue;
5434
5435 ieee80211_send_nullfunc(local, sdata, 0);
5436 }
5437 diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
5438 index 0d07a1d..e022123 100644
5439 --- a/net/netfilter/nf_conntrack_proto_tcp.c
5440 +++ b/net/netfilter/nf_conntrack_proto_tcp.c
5441 @@ -158,21 +158,18 @@ static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
5442 * sCL -> sSS
5443 */
5444 /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
5445 -/*synack*/ { sIV, sIV, sIG, sIG, sIG, sIG, sIG, sIG, sIG, sSR },
5446 +/*synack*/ { sIV, sIV, sSR, sIV, sIV, sIV, sIV, sIV, sIV, sSR },
5447 /*
5448 * sNO -> sIV Too late and no reason to do anything
5449 * sSS -> sIV Client can't send SYN and then SYN/ACK
5450 * sS2 -> sSR SYN/ACK sent to SYN2 in simultaneous open
5451 - * sSR -> sIG
5452 - * sES -> sIG Error: SYNs in window outside the SYN_SENT state
5453 - * are errors. Receiver will reply with RST
5454 - * and close the connection.
5455 - * Or we are not in sync and hold a dead connection.
5456 - * sFW -> sIG
5457 - * sCW -> sIG
5458 - * sLA -> sIG
5459 - * sTW -> sIG
5460 - * sCL -> sIG
5461 + * sSR -> sSR Late retransmitted SYN/ACK in simultaneous open
5462 + * sES -> sIV Invalid SYN/ACK packets sent by the client
5463 + * sFW -> sIV
5464 + * sCW -> sIV
5465 + * sLA -> sIV
5466 + * sTW -> sIV
5467 + * sCL -> sIV
5468 */
5469 /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
5470 /*fin*/ { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
5471 @@ -627,15 +624,9 @@ static bool tcp_in_window(const struct nf_conn *ct,
5472 ack = sack = receiver->td_end;
5473 }
5474
5475 - if (seq == end
5476 - && (!tcph->rst
5477 - || (seq == 0 && state->state == TCP_CONNTRACK_SYN_SENT)))
5478 + if (tcph->rst && seq == 0 && state->state == TCP_CONNTRACK_SYN_SENT)
5479 /*
5480 - * Packets contains no data: we assume it is valid
5481 - * and check the ack value only.
5482 - * However RST segments are always validated by their
5483 - * SEQ number, except when seq == 0 (reset sent answering
5484 - * SYN.
5485 + * RST sent answering SYN.
5486 */
5487 seq = end = sender->td_end;
5488
5489 diff --git a/net/wireless/reg.c b/net/wireless/reg.c
5490 index b01449f..4dc8347 100644
5491 --- a/net/wireless/reg.c
5492 +++ b/net/wireless/reg.c
5493 @@ -134,9 +134,8 @@ static const struct ieee80211_regdomain world_regdom = {
5494 .reg_rules = {
5495 /* IEEE 802.11b/g, channels 1..11 */
5496 REG_RULE(2412-10, 2462+10, 40, 6, 20, 0),
5497 - /* IEEE 802.11b/g, channels 12..13. No HT40
5498 - * channel fits here. */
5499 - REG_RULE(2467-10, 2472+10, 20, 6, 20,
5500 + /* IEEE 802.11b/g, channels 12..13. */
5501 + REG_RULE(2467-10, 2472+10, 40, 6, 20,
5502 NL80211_RRF_PASSIVE_SCAN |
5503 NL80211_RRF_NO_IBSS),
5504 /* IEEE 802.11 channel 14 - Only JP enables
5505 diff --git a/security/selinux/netnode.c b/security/selinux/netnode.c
5506 index 8636585..04aa5c8 100644
5507 --- a/security/selinux/netnode.c
5508 +++ b/security/selinux/netnode.c
5509 @@ -174,7 +174,8 @@ static void sel_netnode_insert(struct sel_netnode *node)
5510 if (sel_netnode_hash[idx].size == SEL_NETNODE_HASH_BKT_LIMIT) {
5511 struct sel_netnode *tail;
5512 tail = list_entry(
5513 - rcu_dereference(sel_netnode_hash[idx].list.prev),
5514 + rcu_dereference_protected(sel_netnode_hash[idx].list.prev,
5515 + lockdep_is_held(&sel_netnode_lock)),
5516 struct sel_netnode, list);
5517 list_del_rcu(&tail->list);
5518 kfree_rcu(tail, rcu);
5519 diff --git a/sound/pci/hda/patch_analog.c b/sound/pci/hda/patch_analog.c
5520 index 7143393..e23ad3f 100644
5521 --- a/sound/pci/hda/patch_analog.c
5522 +++ b/sound/pci/hda/patch_analog.c
5523 @@ -544,6 +544,7 @@ static int ad198x_build_pcms(struct hda_codec *codec)
5524 if (spec->multiout.dig_out_nid) {
5525 info++;
5526 codec->num_pcms++;
5527 + codec->spdif_status_reset = 1;
5528 info->name = "AD198x Digital";
5529 info->pcm_type = HDA_PCM_TYPE_SPDIF;
5530 info->stream[SNDRV_PCM_STREAM_PLAYBACK] = ad198x_pcm_digital_playback;
5531 diff --git a/sound/pci/hda/patch_cirrus.c b/sound/pci/hda/patch_cirrus.c
5532 index 2bc6c51..057f95a2 100644
5533 --- a/sound/pci/hda/patch_cirrus.c
5534 +++ b/sound/pci/hda/patch_cirrus.c
5535 @@ -95,8 +95,8 @@ enum {
5536 #define CS420X_VENDOR_NID 0x11
5537 #define CS_DIG_OUT1_PIN_NID 0x10
5538 #define CS_DIG_OUT2_PIN_NID 0x15
5539 -#define CS_DMIC1_PIN_NID 0x12
5540 -#define CS_DMIC2_PIN_NID 0x0e
5541 +#define CS_DMIC1_PIN_NID 0x0e
5542 +#define CS_DMIC2_PIN_NID 0x12
5543
5544 /* coef indices */
5545 #define IDX_SPDIF_STAT 0x0000
5546 @@ -1084,14 +1084,18 @@ static void init_input(struct hda_codec *codec)
5547 cs_automic(codec);
5548
5549 coef = 0x000a; /* ADC1/2 - Digital and Analog Soft Ramp */
5550 + cs_vendor_coef_set(codec, IDX_ADC_CFG, coef);
5551 +
5552 + coef = cs_vendor_coef_get(codec, IDX_BEEP_CFG);
5553 if (is_active_pin(codec, CS_DMIC2_PIN_NID))
5554 - coef |= 0x0500; /* DMIC2 2 chan on, GPIO1 off */
5555 + coef |= 1 << 4; /* DMIC2 2 chan on, GPIO1 off */
5556 if (is_active_pin(codec, CS_DMIC1_PIN_NID))
5557 - coef |= 0x1800; /* DMIC1 2 chan on, GPIO0 off
5558 + coef |= 1 << 3; /* DMIC1 2 chan on, GPIO0 off
5559 * No effect if SPDIF_OUT2 is
5560 * selected in IDX_SPDIF_CTL.
5561 */
5562 - cs_vendor_coef_set(codec, IDX_ADC_CFG, coef);
5563 +
5564 + cs_vendor_coef_set(codec, IDX_BEEP_CFG, coef);
5565 } else {
5566 if (spec->mic_detect)
5567 cs_automic(codec);
5568 @@ -1112,7 +1116,7 @@ static const struct hda_verb cs_coef_init_verbs[] = {
5569 | 0x0400 /* Disable Coefficient Auto increment */
5570 )},
5571 /* Beep */
5572 - {0x11, AC_VERB_SET_COEF_INDEX, IDX_DAC_CFG},
5573 + {0x11, AC_VERB_SET_COEF_INDEX, IDX_BEEP_CFG},
5574 {0x11, AC_VERB_SET_PROC_COEF, 0x0007}, /* Enable Beep thru DAC1/2/3 */
5575
5576 {} /* terminator */
5577 diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
5578 index 6ecf1d4..257fe87 100644
5579 --- a/sound/pci/hda/patch_realtek.c
5580 +++ b/sound/pci/hda/patch_realtek.c
5581 @@ -5458,6 +5458,7 @@ static const struct snd_pci_quirk alc882_fixup_tbl[] = {
5582 SND_PCI_QUIRK(0x106b, 0x4000, "MacbookPro 5,1", ALC889_FIXUP_IMAC91_VREF),
5583 SND_PCI_QUIRK(0x106b, 0x4100, "Macmini 3,1", ALC889_FIXUP_IMAC91_VREF),
5584 SND_PCI_QUIRK(0x106b, 0x4200, "Mac Pro 5,1", ALC885_FIXUP_MACPRO_GPIO),
5585 + SND_PCI_QUIRK(0x106b, 0x4300, "iMac 9,1", ALC889_FIXUP_IMAC91_VREF),
5586 SND_PCI_QUIRK(0x106b, 0x4600, "MacbookPro 5,2", ALC889_FIXUP_IMAC91_VREF),
5587 SND_PCI_QUIRK(0x106b, 0x4900, "iMac 9,1 Aluminum", ALC889_FIXUP_IMAC91_VREF),
5588 SND_PCI_QUIRK(0x106b, 0x4a00, "Macbook 5,2", ALC889_FIXUP_IMAC91_VREF),
5589 @@ -7047,6 +7048,7 @@ static const struct hda_codec_preset snd_hda_preset_realtek[] = {
5590 .patch = patch_alc662 },
5591 { .id = 0x10ec0663, .name = "ALC663", .patch = patch_alc662 },
5592 { .id = 0x10ec0665, .name = "ALC665", .patch = patch_alc662 },
5593 + { .id = 0x10ec0668, .name = "ALC668", .patch = patch_alc662 },
5594 { .id = 0x10ec0670, .name = "ALC670", .patch = patch_alc662 },
5595 { .id = 0x10ec0680, .name = "ALC680", .patch = patch_alc680 },
5596 { .id = 0x10ec0880, .name = "ALC880", .patch = patch_alc880 },
5597 @@ -7064,6 +7066,7 @@ static const struct hda_codec_preset snd_hda_preset_realtek[] = {
5598 { .id = 0x10ec0889, .name = "ALC889", .patch = patch_alc882 },
5599 { .id = 0x10ec0892, .name = "ALC892", .patch = patch_alc662 },
5600 { .id = 0x10ec0899, .name = "ALC898", .patch = patch_alc882 },
5601 + { .id = 0x10ec0900, .name = "ALC1150", .patch = patch_alc882 },
5602 {} /* terminator */
5603 };
5604
5605 diff --git a/sound/pci/hda/patch_via.c b/sound/pci/hda/patch_via.c
5606 index 3998d09b..9dafacd 100644
5607 --- a/sound/pci/hda/patch_via.c
5608 +++ b/sound/pci/hda/patch_via.c
5609 @@ -1868,11 +1868,11 @@ static int via_auto_fill_dac_nids(struct hda_codec *codec)
5610 {
5611 struct via_spec *spec = codec->spec;
5612 const struct auto_pin_cfg *cfg = &spec->autocfg;
5613 - int i, dac_num;
5614 + int i;
5615 hda_nid_t nid;
5616
5617 + spec->multiout.num_dacs = 0;
5618 spec->multiout.dac_nids = spec->private_dac_nids;
5619 - dac_num = 0;
5620 for (i = 0; i < cfg->line_outs; i++) {
5621 hda_nid_t dac = 0;
5622 nid = cfg->line_out_pins[i];
5623 @@ -1883,16 +1883,13 @@ static int via_auto_fill_dac_nids(struct hda_codec *codec)
5624 if (!i && parse_output_path(codec, nid, dac, 1,
5625 &spec->out_mix_path))
5626 dac = spec->out_mix_path.path[0];
5627 - if (dac) {
5628 - spec->private_dac_nids[i] = dac;
5629 - dac_num++;
5630 - }
5631 + if (dac)
5632 + spec->private_dac_nids[spec->multiout.num_dacs++] = dac;
5633 }
5634 if (!spec->out_path[0].depth && spec->out_mix_path.depth) {
5635 spec->out_path[0] = spec->out_mix_path;
5636 spec->out_mix_path.depth = 0;
5637 }
5638 - spec->multiout.num_dacs = dac_num;
5639 return 0;
5640 }
5641
5642 @@ -3668,6 +3665,18 @@ static void set_widgets_power_state_vt2002P(struct hda_codec *codec)
5643 update_power_state(codec, 0x21, AC_PWRST_D3);
5644 }
5645
5646 +/* NIDs 0x24 and 0x33 on VT1802 have connections to non-existing NID 0x3e
5647 + * Replace this with mixer NID 0x1c
5648 + */
5649 +static void fix_vt1802_connections(struct hda_codec *codec)
5650 +{
5651 + static hda_nid_t conn_24[] = { 0x14, 0x1c };
5652 + static hda_nid_t conn_33[] = { 0x1c };
5653 +
5654 + snd_hda_override_conn_list(codec, 0x24, ARRAY_SIZE(conn_24), conn_24);
5655 + snd_hda_override_conn_list(codec, 0x33, ARRAY_SIZE(conn_33), conn_33);
5656 +}
5657 +
5658 /* patch for vt2002P */
5659 static int patch_vt2002P(struct hda_codec *codec)
5660 {
5661 @@ -3682,6 +3691,8 @@ static int patch_vt2002P(struct hda_codec *codec)
5662 spec->aa_mix_nid = 0x21;
5663 override_mic_boost(codec, 0x2b, 0, 3, 40);
5664 override_mic_boost(codec, 0x29, 0, 3, 40);
5665 + if (spec->codec_type == VT1802)
5666 + fix_vt1802_connections(codec);
5667 add_secret_dac_path(codec);
5668
5669 /* automatic parse from the BIOS config */
5670 diff --git a/sound/soc/codecs/wm8978.c b/sound/soc/codecs/wm8978.c
5671 index 72d5fdc..6c37c7c 100644
5672 --- a/sound/soc/codecs/wm8978.c
5673 +++ b/sound/soc/codecs/wm8978.c
5674 @@ -783,7 +783,7 @@ static int wm8978_hw_params(struct snd_pcm_substream *substream,
5675 wm8978->mclk_idx = -1;
5676 f_sel = wm8978->f_mclk;
5677 } else {
5678 - if (!wm8978->f_pllout) {
5679 + if (!wm8978->f_opclk) {
5680 /* We only enter here, if OPCLK is not used */
5681 int ret = wm8978_configure_pll(codec);
5682 if (ret < 0)
5683 diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c
5684 index c41efe0..9ae82a4 100644
5685 --- a/sound/soc/soc-dapm.c
5686 +++ b/sound/soc/soc-dapm.c
5687 @@ -3253,7 +3253,7 @@ void snd_soc_dapm_shutdown(struct snd_soc_card *card)
5688 {
5689 struct snd_soc_codec *codec;
5690
5691 - list_for_each_entry(codec, &card->codec_dev_list, list) {
5692 + list_for_each_entry(codec, &card->codec_dev_list, card_list) {
5693 soc_dapm_shutdown_codec(&codec->dapm);
5694 if (codec->dapm.bias_level == SND_SOC_BIAS_STANDBY)
5695 snd_soc_dapm_set_bias_level(&codec->dapm,

  ViewVC Help
Powered by ViewVC 1.1.20